Ejemplo n.º 1
0
 def test_web_scraping(self):
     # Section "6. Web Scraping"
     # {1}, {2}
     self.project.split_column('key', separator=':')
     self.assertInResponse('Split 5409 cell(s) in column key')
     self.project.rename_column('key 1', 'page')
     self.assertInResponse('Rename column key 1 to page')
     self.project.rename_column('key 2', 'top')
     self.assertInResponse('Rename column key 2 to top')
     self.project.move_column('line', 'end')
     self.assertInResponse('Move column line to position 2')
     # {3}
     self.project.sorting = facet.Sorting([
         {'column': 'page', 'valueType': 'number'},
         {'column': 'top',  'valueType': 'number'},
     ])
     self.project.reorder_rows()
     self.assertInResponse('Reorder rows')
     first_row = self.project.get_rows(limit=1).rows[0]
     self.assertEqual(first_row['page'], 1)
     self.assertEqual(first_row['top'], 24)
     # {4}
     filter_facet = facet.TextFilterFacet('line', 'ahman')
     rows = self.project.get_rows(filter_facet).rows
     self.assertEqual(len(rows), 1)
     self.assertEqual(rows[0]['top'], 106)
     filter_facet.query = 'alvarez'
     rows = self.project.get_rows().rows
     self.assertEqual(len(rows), 2)
     self.assertEqual(rows[-1]['top'], 567)
     self.project.engine.remove_all()
     # {5} - tutorial says 'line'; it means 'top'
     line_facet = facet.NumericFacet('top')
     line_facet.to = 100
     self.project.remove_rows(line_facet)
     self.assertInResponse('Remove 775 rows')
     line_facet.From = 570
     line_facet.to = 600
     self.project.remove_rows(line_facet)
     self.assertInResponse('Remove 71 rows')
     line_facet.reset()
     response = self.project.get_rows()
     self.assertEqual(response.filtered, 4563)
     # {6}
     page_facet = facet.TextFacet('page', 1)   # 1 not '1'
     self.project.engine.add_facet(page_facet)
     # {7}
     rows = self.project.get_rows().rows
     # Look for a row with a name in it by skipping HTML
     name_row = [row for row in rows if '<b>' not in row['line']][0]
     self.assertTrue('WELLNESS' in name_row['line'])
     self.assertEqual(name_row['top'], 161)
     line_facet.From = 20
     line_facet.to = 160
     self.project.remove_rows()
     self.assertInResponse('Remove 9 rows')
     self.project.engine.remove_all()
     # {8}
     self.project.text_transform('line', expression=self.filter_expr_1)
     self.assertInResponse('Text transform on 4554 cells in column line')
Ejemplo n.º 2
0
 def setUp(self):
     refinetest.RefineTestCase.setUp(self)
     response = self.project.get_rows(limit=10)
     self.assertEqual(10, len(response.rows))
     self.assertEqual(10, response.limit)
     self.project.mass_edit('Office Title',
                            [{
                                'from': ['Council Member', 'Councilmember'],
                                'to': 'Council Member'
                            }])
     self.assertInResponse('9')
     clusters = self.project.compute_clusters('Candidate Name')
     for cluster in clusters[0:3]:  # just do a few
         for match in cluster:
             # {2}
             if match['value'].endswith(', '):
                 response = self.project.get_rows(
                     facet.TextFacet('Candidate Name', match['value']))
                 self.assertEqual(len(response.rows), 1)
                 for row in response.rows:
                     self.project.star_row(row)
                     self.assertInResponse(str(row.index + 1))
     # {5}, {6}, {7}
     response = self.project.compute_facets(facet.StarredFacet(True))
     self.assertEqual(len(response.facets[0].choices), 2)  # true & false
     self.assertEqual(response.facets[0].choices[True].count, 2)
     self.project.remove_rows()
     self.assertInResponse('2 rows')
Ejemplo n.º 3
0
 def test_facet(self):
     # Section "2. Exploration using Facets": {4}
     party_code_facet = facet.TextFacet(column='Party Code')
     response = self.project.compute_facets(party_code_facet)
     pc = response.facets[0]
     # test look by index same as look up by facet object
     self.assertEqual(pc, response.facets[party_code_facet])
     self.assertEqual(pc.name, 'Party Code')
     self.assertEqual(pc.choices['D'].count, 3700)
     self.assertEqual(pc.choices['N'].count, 15)
     self.assertEqual(pc.blank_choice.count, 1446)
     # {5}, {6}
     engine = facet.Engine(party_code_facet)
     ethnicity_facet = facet.TextFacet(column='Ethnicity')
     engine.add_facet(ethnicity_facet)
     self.project.engine = engine
     response = self.project.compute_facets()
     e = response.facets[ethnicity_facet]
     self.assertEqual(e.choices['B'].count, 1255)
     self.assertEqual(e.choices['W'].count, 4469)
     # {7}
     ethnicity_facet.include('B')
     response = self.project.get_rows()
     self.assertEqual(response.filtered, 1255)
     indexes = [row.index for row in response.rows]
     self.assertEqual(indexes, [1, 2, 3, 4, 6, 12, 18, 26, 28, 32])
     # {8}
     response = self.project.compute_facets()
     pc = response.facets[party_code_facet]
     self.assertEqual(pc.name, 'Party Code')
     self.assertEqual(pc.choices['D'].count, 1179)
     self.assertEqual(pc.choices['R'].count, 11)
     self.assertEqual(pc.blank_choice.count, 46)
     # {9}
     party_code_facet.include('R')
     response = self.project.compute_facets()
     e = response.facets[ethnicity_facet]
     self.assertEqual(e.choices['B'].count, 11)
     # {10}
     party_code_facet.reset()
     ethnicity_facet.reset()
     response = self.project.get_rows()
     self.assertEqual(response.filtered, 6958)
     # {11}
     office_title_facet = facet.TextFacet('Office Title')
     self.project.engine.add_facet(office_title_facet)
     response = self.project.compute_facets()
     self.assertEqual(len(response.facets[2].choices), 76)
     # {12} - XXX not sure how to interpret bins & baseBins yet
     self.project.text_transform('Office Level', 'value.toNumber()')
     office_level_facet = facet.NumericFacet('Office Level')
     self.project.engine.add_facet(office_level_facet)
     # {13}
     office_level_facet.From = 300   # from reserved word
     office_level_facet.to = 320
     response = self.project.get_rows()
     self.assertEqual(1907, response.filtered)
     response = self.project.compute_facets()
     ot = response.facets[office_title_facet]
     self.assertEqual(len(ot.choices), 21)
     self.assertEqual(ot.choices['Chief of Police'].count, 2)
     self.assertEqual(ot.choices['Chief of Police          '].count, 211)
     # {14}
     self.project.engine.remove_all()
     response = self.project.get_rows()
     self.assertEqual(response.filtered, 6958)
     # {15}
     phone_facet = facet.TextFacet('Phone', expression='value[0, 3]')
     self.project.engine.add_facet(phone_facet)
     response = self.project.compute_facets()
     p = response.facets[phone_facet]
     self.assertEqual(p.expression, 'value[0, 3]')
     self.assertEqual(p.choices['318'].count, 2331)
     # {16}
     commissioned_date_facet = facet.NumericFacet(
         'Commissioned Date',
         expression='value.toDate().datePart("year")')
     self.project.engine.add_facet(commissioned_date_facet)
     response = self.project.compute_facets()
     cd = response.facets[commissioned_date_facet]
     self.assertEqual(cd.error_count, 959)
     self.assertEqual(cd.numeric_count, 5999)
     # {17}
     office_description_facet = facet.NumericFacet(
         'Office Description',
         expression=r'value.match(/\D*(\d+)\w\w Rep.*/)[0].toNumber()')
     self.project.engine.add_facet(office_description_facet)
     response = self.project.compute_facets()
     od = response.facets[office_description_facet]
     self.assertEqual(od.min, 0)
     self.assertEqual(od.max, 110)
     self.assertEqual(od.numeric_count, 548)
Ejemplo n.º 4
0
 def test_transpose_fixed_number_of_rows_into_columns(self):
     if self.server.version not in ('2.0', '2.1'):
         self.project.rename_column('Column 1', 'Column')
     # Section "5. Structural Editing,
     #             Transpose Fixed Number of Rows into Columns"
     # {1}
     self.assertTrue('Column' in self.project.column_order)
     # {8}
     self.project.transpose_rows_into_columns('Column', 4)
     self.assertInResponse('Transpose every 4 cells in column Column')
     # {9} - renaming column triggers a bug in Refine <= 2.1
     if self.server.version not in ('2.0', '2.1'):
         self.project.rename_column('Column 2', 'Address')
         self.project.rename_column('Column 3', 'Address 2')
         self.project.rename_column('Column 4', 'Status')
     # {10}
     self.project.add_column(
         'Column 1', 'Transaction',
         'if(value.contains(" sent "), "send", "receive")')
     self.assertInResponse('Column 1 by filling 4 rows')
     # {11}
     transaction_facet = facet.TextFacet(column='Transaction',
                                         selection='send')
     self.project.engine.add_facet(transaction_facet)
     self.project.compute_facets()
     # {12}, {13}, {14}
     self.project.add_column(
         'Column 1', 'Sender',
         'value.partition(" sent ")[0]')
     # XXX resetting the facet shows data in rows with Transaction=receive
     #     which shouldn't have been possible with the facet.
     self.project.add_column(
         'Column 1', 'Recipient',
         'value.partition(" to ")[2].partition(" on ")[0]')
     self.project.add_column(
         'Column 1', 'Amount',
         'value.partition(" sent ")[2].partition(" to ")[0]')
     # {15}
     transaction_facet.reset().include('receive')
     self.project.get_rows()
     # XXX there seems to be some kind of bug where the model doesn't
     #     match get_rows() output - cellIndex being returned that are
     #     out of range.
     # self.assertTrue(a_row['Sender'] is None)
     # self.assertTrue(a_row['Recipient'] is None)
     # self.assertTrue(a_row['Amount'] is None)
     # {16}
     for column, expression in (
         ('Sender',
          'cells["Column 1"].value.partition(" from ")[2].partition(" on ")[0]'),
         ('Recipient',
          'cells["Column 1"].value.partition(" received ")[0]'),
         ('Amount',
          'cells["Column 1"].value.partition(" received ")[2].partition(" from ")[0]')
     ):
         self.project.text_transform(column, expression)
         self.assertInResponse('2 cells')
     # {17}
     transaction_facet.reset()
     # {18}
     self.project.text_transform('Column 1', 'value.partition(" on ")[2]')
     self.assertInResponse('4 cells')
     # {19}
     self.project.reorder_columns(['Transaction', 'Amount', 'Sender', 'Recipient'])
     self.assertInResponse('Reorder columns')
Ejemplo n.º 5
0
    def test_editing(self):
        # Section "3. Cell Editing": {1}
        self.project.engine.remove_all()    # redundant due to setUp
        # {2}
        self.project.text_transform(column='Zip Code 2', expression='value.toString()[0, 5]')
        if self.server.version in ('2.0', '2.1', '2.5',):
            self.assertInResponse('transform on 6067 cells in column Zip Code 2')
        elif self.server.version in ('2.8',):
            self.assertInResponse('transform on 1441 cells in column Zip Code 2')
        elif self.server.version in ('3.0-beta',):
            self.assertInResponse('transform on 1441 cells in column Zip Code 2')
        # {3} - XXX history
        # {4}
        office_title_facet = facet.TextFacet('Office Title')
        self.project.engine.add_facet(office_title_facet)
        response = self.project.compute_facets()
        self.assertEqual(len(response.facets[office_title_facet].choices), 76)
        self.project.text_transform('Office Title', 'value.trim()')
        self.assertInResponse('6895')
        response = self.project.compute_facets()
        self.assertEqual(len(response.facets[office_title_facet].choices), 67)
        # {5}
        self.project.edit('Office Title', 'Councilmen', 'Councilman')
        self.assertInResponse('13')
        response = self.project.compute_facets()
        self.assertEqual(len(response.facets[office_title_facet].choices), 66)
        # {6}
        response = self.project.compute_clusters('Office Title')
        self.assertTrue(not response)
        # {7}
        clusters = self.project.compute_clusters('Office Title', 'knn')
        self.assertEqual(len(clusters), 7)
        first_cluster = clusters[0]
        self.assertEqual(len(first_cluster), 2)
        if self.server.version in ('2.0', '2.1', '2.5'):
            self.assertEqual(first_cluster[0]['value'], 'RSCC Member')
            self.assertEqual(first_cluster[0]['count'], 233)
        elif self.server.version in ('2.8', '3.0-beta'):
            self.assertEqual(first_cluster[0]['value'], 'DPEC Member at Large')
            self.assertEqual(first_cluster[0]['count'], 6)
        # Not strictly necessary to repeat 'Council Member' but a test
        # of mass_edit, and it's also what the front end sends.
        self.project.mass_edit('Office Title', [{'from': ['Council Member', 'Councilmember'], 'to': 'Council Member'}])
        self.assertInResponse('372')
        response = self.project.compute_facets()
        self.assertEqual(len(response.facets[office_title_facet].choices), 65)

        # Section "4. Row and Column Editing, Batched Row Deletion"
        # Test doesn't strictly follow the tutorial as the "Browse this
        # cluster" performs a text facet which the server can't complete
        # as it busts its max facet count. The useful work is done with
        # get_rows(). Also, we can facet & select in one; the UI can't.
        # {1}, {2}, {3}, {4}
        clusters = self.project.compute_clusters('Candidate Name')
        for cluster in clusters[0:3]:   # just do a few
            for match in cluster:
                # {2}
                if match['value'].endswith(', '):
                    response = self.project.get_rows(
                        facet.TextFacet('Candidate Name', match['value']))
                    self.assertEqual(len(response.rows), 1)
                    for row in response.rows:
                        self.project.star_row(row)
                        self.assertInResponse(str(row.index + 1))
        # {5}, {6}, {7}
        response = self.project.compute_facets(facet.StarredFacet(True))
        self.assertEqual(len(response.facets[0].choices), 2)    # true & false
        if self.server.version in ('2.0', '2.1', '2.5'):
            self.assertEqual(response.facets[0].choices[True].count, 3)
        elif self.server.version in ('2.8', '3.0-beta'):
            self.assertEqual(response.facets[0].choices[True].count, 2)
        self.project.remove_rows()
        if self.server.version in ('2.0', '2.1', '2.5'):
            self.assertInResponse('3 rows')
        elif self.server.version in ('2.8', '3.0-beta'):
            self.assertInResponse('2 rows')