def test_t7(self): # logger.info("Running Tests4Dataset1test1/test_t7") ds = Dataset(TESTFILE3) ds.split(convert=True, keep_orig=True, validation_size=3, random_seed=1) # check if getting the batches and validation sets works valset_orig = ds.validation_set_orig() # print("DEBUG: valset_orig=%s" % valset_orig, file=sys.stderr) assert len(valset_orig) == 3 vorigi2 = valset_orig[1] assert vorigi2 == [[ 'you', 'think', 'this', 'place', 'is', 'nice', 'VERB', 'DET', 'a', 'a', 'a', 'a', 'a', 'a', '', 'nk', 'is', 'ce', '', 'ce', '', 'ink', '', 'ace', '', '' ], 'NOUN'] valset_conv = ds.validation_set_converted() # print("DEBUG: valset_conv=%s" % valset_conv, file=sys.stderr) assert len(valset_conv) == 3 vconvi2 = valset_conv[1] # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! DEBUG: vconvi2=", vconvi2, file=sys.stderr) assert vconvi2 == [[ 13, 157, 25, 104, 12, 319, 2, 5, 2, 2, 2, 2, 2, 2, 0, 151, 28, 14, 0, 14, 0, 215, 0, 101, 0, 0 ], 0] valset_conv_b = ds.validation_set_converted(as_batch=True) # print("DEBUG: valset_conv_b=%s" % (valset_conv_b,), file=sys.stderr) # we expect a tuple for indep and dep assert len(valset_conv_b) == 2 indep1, dep1 = valset_conv_b # the indep part should now have lenth equal to the number of features assert len(indep1) == ds.nFeatures # there should be 3 values for that first feature assert len(indep1[0]) == 3 # get a batch of original data bitb1 = ds.batches_original(train=True, batch_size=4, reshape=False) batch_orig1 = next(iter(bitb1)) # print("DEBUG: batch_orig1=%s" % (batch_orig1,), file=sys.stderr) # if reshape was False, this is just a list of instances in original format assert len(batch_orig1) == 4 assert batch_orig1[1] == [[ 'Bill', 'Bradford', 'in', 'Credit', 'are', 'supposed', 'PROPN', 'ADP', 'Aa', 'Aa', 'a', 'Aa', 'a', 'a', 'll', 'rd', '', 'it', '', 'ed', '', 'ord', '', 'dit', '', 'sed' ], 'NOUN'] bitb2 = ds.batches_original(train=True, batch_size=4, reshape=True) batch_orig2 = next(iter(bitb2)) # print("DEBUG: batch_orig2=%s" % (batch_orig2,), file=sys.stderr) # if reshape was True, this is a tuple where the first element is the list of features assert len(batch_orig2) == 2 featurelist1 = batch_orig2[0] feature1 = featurelist1[0] assert feature1[1] == 'Bill' bconvb1 = ds.batches_converted(train=True, batch_size=4, reshape=False) batch_conv1 = next(iter(bconvb1)) # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! DEBUG: !!!batch_conv1[1]=%s" % (batch_conv1[1],), file=sys.stderr) assert len(batch_conv1) == 4 # TODO: check why some indices changed between previously and now and if this is till correct! assert batch_conv1[1] == [[ 1210, 1495, 9, 796, 23, 3075, 6, 3, 3, 3, 2, 3, 2, 2, 20, 54, 0, 86, 0, 2, 0, 391, 0, 300, 0, 77 ], 0] bconvb2 = ds.batches_converted(train=True, batch_size=4, reshape=True) batch_conv2 = next(iter(bconvb2)) # print("DEBUG: batch_conv2=%s" % (batch_conv2,), file=sys.stderr) assert len(batch_conv2) == 2 featurelist1 = batch_conv2[0] feature1 = featurelist1[0] assert feature1[1] == 1210
def test_t6(self): # logger.info("Running Tests4Dataset1test1/test_t6") ds = Dataset(TESTFILE2) ds.split(convert=True, keep_orig=True, validation_size=3, random_seed=1) # check if getting the batches and validation sets works valset_orig = ds.validation_set_orig() # print("DEBUG: valset_orig=%s" % valset_orig, file=sys.stderr) assert len(valset_orig) == 3 vorigi2 = valset_orig[1] assert vorigi2 == [[[ 'a', 'very', 'well-made', ',', 'funny', 'and', 'entertaining', 'picture', '.' ]], 'pos'] valset_conv = ds.validation_set_converted() # print("DEBUG: valset_conv=%s" % valset_conv, file=sys.stderr) assert len(valset_conv) == 3 vconvi2 = valset_conv[1] # print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! DEBUG: vconvi2=", vconvi2, file=sys.stderr) assert vconvi2 == [[[5, 84, 1530, 4, 75, 6, 190, 175, 2]], 1] valset_conv_b = ds.validation_set_converted(as_batch=True) # print("DEBUG: valset_conv_b=%s" % (valset_conv_b,), file=sys.stderr) # we expect a tuple for indep and dep assert len(valset_conv_b) == 2 indep1, dep1 = valset_conv_b # the indep part should now have lenth one because there is only one feature assert len(indep1) == 1 # there should be 3 values for that first feature # print("DEBUG: indep1[0]=%r" % (indep1[0]), file=sys.stderr) assert len(indep1[0]) == 3 # get a batch of original data bitb1 = ds.batches_original(train=True, batch_size=4, reshape=False) batch_orig1 = next(iter(bitb1)) # print("DEBUG: batch_orig1=%s" % (batch_orig1,), file=sys.stderr) # if reshape was False, this is just a list of instances in original format assert len(batch_orig1) == 4 assert batch_orig1[1] == [[[ 'rife', 'with', 'nutty', 'cliches', 'and', 'far', 'too', 'much', 'dialogue', '.' ]], 'neg'] bitb2 = ds.batches_original(train=True, batch_size=4, reshape=True) batch_orig2 = next(iter(bitb2)) # print("DEBUG: batch_orig2=%s" % (batch_orig2,), file=sys.stderr) # if reshape was True, this is a tuple where the first element is the list of features assert len(batch_orig2) == 2 featurelist1 = batch_orig2[0] feature1 = featurelist1[0] # print("DEBUG: feature1[1]=%s" % (feature1[1],), file=sys.stderr) assert feature1[1] == [ 'rife', 'with', 'nutty', 'cliches', 'and', 'far', 'too', 'much', 'dialogue', '.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '' ] bconvb1 = ds.batches_converted(train=True, batch_size=4, reshape=False) batch_conv1 = next(iter(bconvb1)) # print("DEBUG: batch_conv1=%s" % (batch_conv1,), file=sys.stderr) assert len(batch_conv1) == 4 # print("DEBUG: batch_conv1[1]=%s" % (batch_conv1[1],), file=sys.stderr) assert batch_conv1[1] == [[[ 6694, 17, 6469, 544, 6, 168, 51, 59, 237, 2 ]], 0] bconvb2 = ds.batches_converted(train=True, batch_size=4, reshape=True) batch_conv2 = next(iter(bconvb2)) # print("DEBUG: batch_conv2=%s" % (batch_conv2,), file=sys.stderr) assert len(batch_conv2) == 2 featurelist1 = batch_conv2[0] feature1 = featurelist1[0] # print("DEBUG: feature1[1]=%s" % (feature1[1],), file=sys.stderr) assert feature1[1] == [ 6694, 17, 6469, 544, 6, 168, 51, 59, 237, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]