コード例 #1
0
    def test_t3(self):
        # logger.info("Running Tests4Dataset1test1/test_t3")
        ds = Dataset(TESTFILE2)

        it0 = iter(ds.instances_original())
        inst0 = next(it0)
        indep, dep = inst0
        # print("DEBUG: indep=", indep, file=sys.stderr)
        assert indep == [['invincible', 'is', 'a', 'wonderful', 'movie', '.']]
        assert dep == "pos"

        # check low level conversion methods first
        inst1 = [[['invincible', 'is', 'a', 'wonderful', 'movie', '.']], 'pos']
        (indep1, dep1) = inst1

        indep1_conv = ds.features(indep1)
        logger.debug("Original  indep1=%r", indep1)
        logger.debug("Converted indep1=%r", indep1_conv)
        ngram1 = indep1_conv[0]
        assert len(ngram1) == 6
        # print("DEBUG ngram1[0]=", ngram1[0], file=sys.stderr)
        assert ngram1[0] == 3543
        assert ngram1[1] == 9
        it1 = iter(ds.instances_converted(train=False, convert=True))
        rec = next(it1)
        logger.debug("TESTFILE2 rec1=%r", rec)
        logger.debug("TESTFILE2 info=%r" % ds.get_info())
        (indep1_it, dep1_it) = rec
        ngram1_it = indep1_it[0]
        logger.debug("TESTFILE2 dep_it=%r", dep1_it)
        # print("DEBUG dep1_it=", dep1_it, file=sys.stderr)
        assert len(ngram1_it) == 6
        assert ngram1_it[0] == 3543
        assert ngram1_it[1] == 9
        assert dep1_it == 1
コード例 #2
0
 def test_t4(self):
     # logger.info("Running Tests4Dataset1test1/test_t4")
     ds = Dataset(TESTFILE3)
     logger.debug("TESTFILE3 attrs=%r",
                  ds.meta.get("featureInfo").get("attributes"))
     # Features constructor finishes the vocab, so we need to re-initilize
     features = ds.features
     logger.debug("TESTFILE3 features=%r", features)
     it1 = iter(ds.instances_original())
     rec = next(it1)
     logger.debug("TESTFILE3 rec1=%r", rec)
     logger.debug("TESTFILE3 info=%r" % ds.get_info())