예제 #1
0
    def test_t3(self):
        # logger.info("Running Tests4Dataset1test1/test_t3")
        ds = Dataset(TESTFILE2)

        it0 = iter(ds.instances_original())
        inst0 = next(it0)
        indep, dep = inst0
        # print("DEBUG: indep=", indep, file=sys.stderr)
        assert indep == [['invincible', 'is', 'a', 'wonderful', 'movie', '.']]
        assert dep == "pos"

        # check low level conversion methods first
        inst1 = [[['invincible', 'is', 'a', 'wonderful', 'movie', '.']], 'pos']
        (indep1, dep1) = inst1

        indep1_conv = ds.features(indep1)
        logger.debug("Original  indep1=%r", indep1)
        logger.debug("Converted indep1=%r", indep1_conv)
        ngram1 = indep1_conv[0]
        assert len(ngram1) == 6
        # print("DEBUG ngram1[0]=", ngram1[0], file=sys.stderr)
        assert ngram1[0] == 3543
        assert ngram1[1] == 9
        it1 = iter(ds.instances_converted(train=False, convert=True))
        rec = next(it1)
        logger.debug("TESTFILE2 rec1=%r", rec)
        logger.debug("TESTFILE2 info=%r" % ds.get_info())
        (indep1_it, dep1_it) = rec
        ngram1_it = indep1_it[0]
        logger.debug("TESTFILE2 dep_it=%r", dep1_it)
        # print("DEBUG dep1_it=", dep1_it, file=sys.stderr)
        assert len(ngram1_it) == 6
        assert ngram1_it[0] == 3543
        assert ngram1_it[1] == 9
        assert dep1_it == 1
예제 #2
0
 def test_t4(self):
     # logger.info("Running Tests4Dataset1test1/test_t4")
     ds = Dataset(TESTFILE3)
     logger.debug("TESTFILE3 attrs=%r",
                  ds.meta.get("featureInfo").get("attributes"))
     # Features constructor finishes the vocab, so we need to re-initilize
     features = ds.features
     logger.debug("TESTFILE3 features=%r", features)
     it1 = iter(ds.instances_original())
     rec = next(it1)
     logger.debug("TESTFILE3 rec1=%r", rec)
     logger.debug("TESTFILE3 info=%r" % ds.get_info())
예제 #3
0
    def test_t5(self):
        # logger.info("Running Tests4Dataset1test1/test_t5")
        ds = Dataset(TESTFILE4)
        it1 = iter(ds.instances_converted(train=False, convert=True))
        rec = next(it1)

        indep, dep = rec
        logger.debug("TESTFILE4: indep=%r" % indep)
        logger.debug("TESTFILE4: dep=%r" % dep)
        logger.debug("TESTFILE4 info=%r" % ds.get_info())
        # the first row is a sequence of 3 elements, with 18 independent
        # features and one of 17 different targets
        # so we should convert this into 18 features which each now should have 3 values
        # and 3 onehot vectors for the class

        assert len(dep) == 3
        assert len(indep) == 3  # 3 elements in the sequence
        assert len(indep[0]) == 18
        assert len(indep[1]) == 18
        assert len(indep[2]) == 18
        # check if the class is actually ADJ for all three targets
        dep1 = dep[0]
        dep2 = dep[1]
        dep3 = dep[2]
        t11 = ds.target.vocab.idx2string(dep1)
        assert t11 == "ADJ"
        t12 = ds.target.vocab.idx2string(dep2)
        assert t12 == "ADJ"
        t13 = ds.target.vocab.idx2string(dep3)
        assert t13 == "ADJ"
        # test getting batches in non-reshaped form
        bit1 = ds.batches_converted(train=False,
                                    convert=True,
                                    batch_size=2,
                                    reshape=False)
        biter1 = iter(bit1)
        batch1 = next(biter1)
        # print("DEBUG: TESTFILE4 batch/noreshape=%s" % (batch1,), file=sys.stderr)
        assert len(batch1) == 2
        # test getting batches in reshaped form
        bit2 = ds.batches_converted(train=False,
                                    convert=True,
                                    batch_size=2,
                                    reshape=True)
        biter2 = iter(bit2)
        batch2 = next(biter2)
        # print("DEBUG: TESTFILE4 batch/noreshape=%s" % (batch1,), file=sys.stderr)
        bindep, bdep = batch2
        assert len(bindep) == 18
        assert len(bdep) == 2
        assert len(bindep[0]) == 2
예제 #4
0
 def test_t2(self):
     # logger.info("Running Tests4Dataset1test1/test_t2")
     ds = Dataset(TESTFILE1)
     features = ds.features
     s = features.size()
     assert s == 34
     it1 = iter(ds.instances_converted(train=False, convert=True))
     rec = next(it1)
     logger.debug("TESTFILE1 info=%r" % ds.get_info())
     logger.debug("TESTFILE1 rec1=%r" % rec)
     # we expect rec to be a pair: indep and dep
     indep, dep = rec
     # print("DEBUG: rec=", rec, file=sys.stderr)
     # the indep part has as many values as there are features here
     assert len(indep) == 34
     # the dep part is the encoding for two nominal classes,
     assert dep == 0