コード例 #1
0
ファイル: test_tab_reader.py プロジェクト: hephaex/orange3
    def test_read_and_save_attributes(self):
        samplefile = """\
        Feature 1\tFeature 2\tClass 1\tClass 42
        d        \tM F      \td      \td
                 \ta=1 b=2 \tclass x=a\\ longer\\ string \tclass
        1.0      \tM        \t5      \trich
        """
        file = io.StringIO(samplefile)
        table = read_tab_file(file)

        f1, f2, c1, c2 = table.domain.variables
        self.assertIsInstance(f2, DiscreteVariable)
        self.assertEqual(f2.name, "Feature 2")
        self.assertEqual(f2.attributes, {'a': 1, 'b': 2})
        self.assertIn(c1, table.domain.class_vars)
        self.assertIsInstance(c1, DiscreteVariable)
        self.assertEqual(c1.name, "Class 1")
        self.assertEqual(c1.attributes, {'x': 'a longer string'})
        outf = io.StringIO()
        outf.close = lambda: None
        TabReader.write_file(outf, table)
        saved = outf.getvalue()

        file = io.StringIO(saved)
        table = read_tab_file(file)

        f1, f2, c1, c2 = table.domain.variables
        self.assertIsInstance(f2, DiscreteVariable)
        self.assertEqual(f2.name, "Feature 2")
        self.assertEqual(f2.attributes, {'a': 1, 'b': 2})
        self.assertIn(c1, table.domain.class_vars)
        self.assertIsInstance(c1, DiscreteVariable)
        self.assertEqual(c1.name, "Class 1")
        self.assertEqual(c1.attributes, {'x': 'a longer string'})
コード例 #2
0
ファイル: test_tab_reader.py プロジェクト: odipus/orange3
    def test_read_and_save_attributes(self):
        samplefile = """\
        Feature 1\tFeature 2\tClass 1\tClass 42
        d        \tM F      \td      \td
                 \ta=1 b=2 \tclass x=a\\ longer\\ string \tclass
        1.0      \tM        \t5      \trich
        """
        file = io.StringIO(samplefile)
        table = read_tab_file(file)

        f1, f2, c1, c2 = table.domain.variables
        self.assertIsInstance(f2, DiscreteVariable)
        self.assertEqual(f2.name, "Feature 2")
        self.assertEqual(f2.attributes, {'a': 1, 'b': 2})
        self.assertIn(c1, table.domain.class_vars)
        self.assertIsInstance(c1, DiscreteVariable)
        self.assertEqual(c1.name, "Class 1")
        self.assertEqual(c1.attributes, {'x': 'a longer string'})
        outf = io.StringIO()
        outf.close = lambda: None
        TabReader.write_file(outf, table)
        saved = outf.getvalue()

        file = io.StringIO(saved)
        table = read_tab_file(file)

        f1, f2, c1, c2 = table.domain.variables
        self.assertIsInstance(f2, DiscreteVariable)
        self.assertEqual(f2.name, "Feature 2")
        self.assertEqual(f2.attributes, {'a': 1, 'b': 2})
        self.assertIn(c1, table.domain.class_vars)
        self.assertIsInstance(c1, DiscreteVariable)
        self.assertEqual(c1.name, "Class 1")
        self.assertEqual(c1.attributes, {'x': 'a longer string'})
コード例 #3
0
ファイル: test_tab_reader.py プロジェクト: princesden/orange3
 def test_no_metadata(self):
     tempdir = tempfile.mkdtemp()
     table = Table("titanic")
     table.attributes = OrderedDict()
     fname = path.join(tempdir, "out.tab")
     TabReader.write_table_metadata(fname, table)
     self.assertFalse(path.isfile(fname + ".metadata"))
     shutil.rmtree(tempdir)
コード例 #4
0
 def test_no_metadata(self):
     tempdir = tempfile.mkdtemp()
     try:
         self.data.attributes = OrderedDict()
         fname = path.join(tempdir, "out.tab")
         TabReader.write_table_metadata(fname, self.data)
         self.assertFalse(path.isfile(fname + ".metadata"))
     finally:
         shutil.rmtree(tempdir)
コード例 #5
0
 def test_no_metadata(self):
     tempdir = tempfile.mkdtemp()
     try:
         self.data.attributes = OrderedDict()
         fname = path.join(tempdir, "out.tab")
         TabReader.write_table_metadata(fname, self.data)
         self.assertFalse(path.isfile(fname + ".metadata"))
     finally:
         shutil.rmtree(tempdir)
コード例 #6
0
 def test_had_metadata_now_there_is_none(self):
     tempdir = tempfile.mkdtemp()
     try:
         self.data.attributes["a"] = "aa"
         fname = path.join(tempdir, "out.tab")
         TabReader.write_table_metadata(fname, self.data)
         self.assertTrue(path.isfile(fname + ".metadata"))
         del self.data.attributes["a"]
         TabReader.write_table_metadata(fname, self.data)
         self.assertFalse(path.isfile(fname + ".metadata"))
     finally:
         shutil.rmtree(tempdir)
コード例 #7
0
ファイル: test_tab_reader.py プロジェクト: acopar/orange3
 def test_metadata(self):
     tempdir = tempfile.mkdtemp()
     try:
         table = Table("titanic")
         table.attributes = OrderedDict()
         table.attributes["a"] = "aa"
         table.attributes["b"] = "bb"
         fname = path.join(tempdir, "out.tab")
         TabReader.write_table_metadata(fname, table)
         self.assertTrue(path.isfile(fname + ".metadata"))
     finally:
         shutil.rmtree(tempdir)
コード例 #8
0
 def test_metadata(self):
     tempdir = tempfile.mkdtemp()
     try:
         table = Table("titanic")
         table.attributes = OrderedDict()
         table.attributes["a"] = "aa"
         table.attributes["b"] = "bb"
         fname = path.join(tempdir, "out.tab")
         TabReader.write_table_metadata(fname, table)
         self.assertTrue(path.isfile(fname + ".metadata"))
     finally:
         shutil.rmtree(tempdir)
コード例 #9
0
 def test_had_metadata_now_there_is_none(self):
     tempdir = tempfile.mkdtemp()
     try:
         self.data.attributes["a"] = "aa"
         fname = path.join(tempdir, "out.tab")
         TabReader.write_table_metadata(fname, self.data)
         self.assertTrue(path.isfile(fname + ".metadata"))
         del self.data.attributes["a"]
         TabReader.write_table_metadata(fname, self.data)
         self.assertFalse(path.isfile(fname + ".metadata"))
     finally:
         shutil.rmtree(tempdir)
コード例 #10
0
ファイル: ow1ka.py プロジェクト: irgolic/orange3-educational
    def table_from_html(self, html):
        soup = BeautifulSoup(html, 'html.parser')
        try:
            html_table = soup.find_all('table')[-1]
        except IndexError:
            raise DataEmptyError

        if '<h2>Anal' in html or 'div_analiza_' in html:
            raise DataIsAnalError

        def _header_row_strings(row):
            return chain.from_iterable(
                repeat(th.get_text(), int(th.get('colspan') or 1)) for th in
                html_table.select('thead tr:nth-of-type(%d) th[title]' % row))

        # self.DATETIME_VAR (available when Paradata is enabled in 1ka UI)
        # should match this variable name format
        header = [
            th1.rstrip(':') +
            ('' if th3 == th1 else ' ({})').format(th3.rstrip(':'))
            for th1, th3 in zip(_header_row_strings(1), _header_row_strings(3))
        ]
        values = [
            [
                (  # If no span, feature is a number or a text field
                    td.get_text() if td.span is None else
                    # If have span, it's a number, but if negative, replace with NaN
                    '' if td.contents[0].strip().startswith('-') else
                    # Else if span, the number is its code, but we want its value
                    td.span.get_text()[1:-1]) for td in tr.select('td')
                if 'data_uid' not in td.get('class', ())
            ] for tr in html_table.select('tbody tr')
        ]

        # Save parsed values into in-mem file for default values processing
        buffer = StringIO()
        writer = csv.writer(buffer, delimiter='\t')
        writer.writerow(header)
        writer.writerows(values)
        buffer.flush()
        buffer.seek(0)

        data = TabReader(buffer).read()

        title = soup.select('body h2:nth-of-type(1)')[0].get_text().split(
            ': ', maxsplit=1)[-1]
        data.name = title

        return data
コード例 #11
0
    def test_read_and_save_attributes(self):
        samplefile = """\
        Feature 1\tFeature 2\tClass 1\tClass 42
        d        \tM F      \td      \td
                 \ta=1 b=2 \tclass x=a\\ longer\\ string \tclass
        1.0      \tM        \t5      \trich
        """
        file = io.StringIO(samplefile)
        table = read_tab_file(file)

        f1, f2, c1, c2 = table.domain.variables
        self.assertIsInstance(f2, DiscreteVariable)
        self.assertEqual(f2.name, "Feature 2")
        self.assertEqual(f2.attributes, {"a": 1, "b": 2})
        self.assertIn(c1, table.domain.class_vars)
        self.assertIsInstance(c1, DiscreteVariable)
        self.assertEqual(c1.name, "Class 1")
        self.assertEqual(c1.attributes, {"x": "a longer string"})
        outf = io.StringIO()
        outf.close = lambda: None
        TabReader.write_file(outf, table)
        saved = outf.getvalue()

        file = io.StringIO(saved)
        table = read_tab_file(file)

        f1, f2, c1, c2 = table.domain.variables
        self.assertIsInstance(f2, DiscreteVariable)
        self.assertEqual(f2.name, "Feature 2")
        self.assertEqual(f2.attributes, {"a": 1, "b": 2})
        self.assertIn(c1, table.domain.class_vars)
        self.assertIsInstance(c1, DiscreteVariable)
        self.assertEqual(c1.name, "Class 1")
        self.assertEqual(c1.attributes, {"x": "a longer string"})

        path = "/path/to/somewhere"
        c1.attributes["path"] = path
        outf = io.StringIO()
        outf.close = lambda: None
        TabReader.write_file(outf, table)
        outf.seek(0)

        table = read_tab_file(outf)
        f1, f2, c1, c2 = table.domain.variables
        self.assertEqual(c1.attributes["path"], path)
コード例 #12
0
 def test_many_discrete(self):
     b = io.StringIO()
     b.write("Poser\nd\n\n")
     b.writelines("K" + str(i) + "\n" for i in range(30000))
     start = time.time()
     _ = TabReader(b).read()
     elapsed = time.time() - start
     if elapsed > 2:
         raise AssertionError()
コード例 #13
0
    def table_from_html(self, html):
        soup = BeautifulSoup(html, 'html.parser')
        try:
            html_table = soup.find_all('table')[-1]
        except IndexError:
            raise DataEmptyError

        if '<h2>Anal' in html or 'div_analiza_' in html:
            raise DataIsAnalError

        def _header_row_strings(row):
            return chain.from_iterable(
                repeat(th.get_text(), int(th.get('colspan') or 1))
                for th in html_table.select('thead tr:nth-of-type(%d) th[title]' % row))

        # self.DATETIME_VAR (available when Paradata is enabled in 1ka UI)
        # should match this variable name format
        header = [th1.rstrip(':') + ('' if th3 == th1 else ' ({})').format(th3.rstrip(':'))
                  for th1, th3 in zip(_header_row_strings(1),
                                      _header_row_strings(3))]
        values = [[(# If no span, feature is a number or a text field
                    td.get_text() if td.span is None else
                    # If have span, it's a number, but if negative, replace with NaN
                    '' if td.contents[0].strip().startswith('-') else
                    # Else if span, the number is its code, but we want its value
                    td.span.get_text()[1:-1])
                   for td in tr.select('td')
                   if 'data_uid' not in td.get('class', ())]
                  for tr in html_table.select('tbody tr')]

        # Save parsed values into in-mem file for default values processing
        buffer = StringIO()
        writer = csv.writer(buffer, delimiter='\t')
        writer.writerow(header)
        writer.writerows(values)
        buffer.flush()
        buffer.seek(0)

        data = TabReader(buffer).read()

        title = soup.select('body h2:nth-of-type(1)')[0].get_text().split(': ', maxsplit=1)[-1]
        data.name = title

        return data
コード例 #14
0
    def test_bad_data(self):
        """
        Firstly it creates predictions with TreeLearner. Then sends predictions and
        different data with different domain to Predictions widget. Those different
        data and domain are similar to original data and domain but they have three
        different target values instead of two.
        GH-2129
        """
        Variable._clear_all_caches()

        filestr1 = """\
        age\tsex\tsurvived
        d\td\td
        \t\tclass
        adult\tmale\tyes
        adult\tfemale\tno
        child\tmale\tyes
        child\tfemale\tyes
        """
        file1 = io.StringIO(filestr1)
        table = TabReader(file1).read()
        learner = TreeLearner()
        tree = learner(table)

        filestr2 = """\
        age\tsex\tsurvived
        d\td\td
        \t\tclass
        adult\tmale\tyes
        adult\tfemale\tno
        child\tmale\tyes
        child\tfemale\tunknown
        """
        file2 = io.StringIO(filestr2)
        bad_table = TabReader(file2).read()

        self.send_signal(self.widget.Inputs.predictors, tree, 1)

        with excepthook_catch():
            self.send_signal(self.widget.Inputs.data, bad_table)

        Variable._clear_all_caches(
        )  # so that test excepting standard titanic work
コード例 #15
0
    def test_read_save_quoted(self):
        quoted = '''\
        S\tA
        s\td
        m\t
        """a"""\ti
        """b"""\tj
        """c\td"""\tk
        '''
        expected = ['"a"', '"b"', '"c\td"']
        f = io.StringIO(quoted)
        table = read_tab_file(f)
        self.assertSequenceEqual(table.metas[:, 0].tolist(), expected)

        f = io.StringIO()
        f.close = lambda: None
        TabReader.write_file(f, table)
        saved = f.getvalue()
        table1 = read_tab_file(io.StringIO(saved))
        self.assertSequenceEqual(table1.metas[:, 0].tolist(), expected)
コード例 #16
0
ファイル: test_tab_reader.py プロジェクト: acopar/orange3
    def test_read_save_quoted(self):
        quoted = '''\
        S\tA
        s\td
        m\t
        """a"""\ti
        """b"""\tj
        """c\td"""\tk
        '''
        expected = ['"a"', '"b"', '"c\td"']
        f = io.StringIO(quoted)
        table = read_tab_file(f)
        self.assertSequenceEqual(table.metas[:, 0].tolist(), expected)

        f = io.StringIO()
        f.close = lambda: None
        TabReader.write_file(f, table)
        saved = f.getvalue()
        table1 = read_tab_file(io.StringIO(saved))
        self.assertSequenceEqual(table1.metas[:, 0].tolist(), expected)
コード例 #17
0
    def test_sheets(self):
        file1 = io.StringIO("\n".join("xd dbac"))
        reader = TabReader(file1)

        self.assertEqual(reader.sheets, ())
コード例 #18
0
def read_tab_file(filename):
    return TabReader(filename).read()
コード例 #19
0
 def test_data_name(self):
     table1 = Table('iris')
     table2 = TabReader(table1.__file__).read()
     self.assertEqual(table1.name, 'iris')
     self.assertEqual(table2.name, 'iris')