def test_web_scraping(self): # Section "6. Web Scraping" # {1}, {2} self.project.split_column('key', separator=':') self.assertInResponse('Split 5409 cell(s) in column key') self.project.rename_column('key 1', 'page') self.assertInResponse('Rename column key 1 to page') self.project.rename_column('key 2', 'top') self.assertInResponse('Rename column key 2 to top') self.project.move_column('line', 'end') self.assertInResponse('Move column line to position 2') # {3} self.project.sorting = facet.Sorting([ {'column': 'page', 'valueType': 'number'}, {'column': 'top', 'valueType': 'number'}, ]) self.project.reorder_rows() self.assertInResponse('Reorder rows') first_row = self.project.get_rows(limit=1).rows[0] self.assertEqual(first_row['page'], 1) self.assertEqual(first_row['top'], 24) # {4} filter_facet = facet.TextFilterFacet('line', 'ahman') rows = self.project.get_rows(filter_facet).rows self.assertEqual(len(rows), 1) self.assertEqual(rows[0]['top'], 106) filter_facet.query = 'alvarez' rows = self.project.get_rows().rows self.assertEqual(len(rows), 2) self.assertEqual(rows[-1]['top'], 567) self.project.engine.remove_all() # {5} - tutorial says 'line'; it means 'top' line_facet = facet.NumericFacet('top') line_facet.to = 100 self.project.remove_rows(line_facet) self.assertInResponse('Remove 775 rows') line_facet.From = 570 line_facet.to = 600 self.project.remove_rows(line_facet) self.assertInResponse('Remove 71 rows') line_facet.reset() response = self.project.get_rows() self.assertEqual(response.filtered, 4563) # {6} page_facet = facet.TextFacet('page', 1) # 1 not '1' self.project.engine.add_facet(page_facet) # {7} rows = self.project.get_rows().rows # Look for a row with a name in it by skipping HTML name_row = [row for row in rows if '<b>' not in row['line']][0] self.assertTrue('WELLNESS' in name_row['line']) self.assertEqual(name_row['top'], 161) line_facet.From = 20 line_facet.to = 160 self.project.remove_rows() self.assertInResponse('Remove 9 rows') self.project.engine.remove_all() # {8} self.project.text_transform('line', expression=self.filter_expr_1) self.assertInResponse('Text transform on 4554 cells in column line')
def test_transpose_fixed_number_of_rows_into_columns(self): # Section "5. Structural Editing, # Transpose Fixed Number of Rows into Columns" # {1} self.assertTrue('Column' in self.project.column_order) # {8} self.project.transpose_rows_into_columns('Column', 4) self.assertInResponse('Transpose every 4 cells in column Column') # {9} - renaming column triggers a bug in Refine # {10} self.project.add_column( 'Column 1', 'Transaction', 'if(value.contains(" sent "), "send", "receive")') self.assertInResponse('Column 1 by filling 4 rows') # {11} transaction_facet = facet.TextFacet(column='Transaction', selection='send') self.project.engine.add_facet(transaction_facet) self.project.compute_facets() # {12}, {13}, {14} self.project.add_column('Column 1', 'Sender', 'value.partition(" sent ")[0]') # XXX resetting the facet shows data in rows with Transaction=receive # which shouldn't have been possible with the facet. self.project.add_column( 'Column 1', 'Recipient', 'value.partition(" to ")[2].partition(" on ")[0]') self.project.add_column( 'Column 1', 'Amount', 'value.partition(" sent ")[2].partition(" to ")[0]') # {15} transaction_facet.reset().include('receive') response = self.project.get_rows() # XXX there seems to be some kind of bug where the model doesn't # match get_rows() output - cellIndex being returned that are # out of range. #self.assertTrue(a_row['Sender'] is None) #self.assertTrue(a_row['Recipient'] is None) #self.assertTrue(a_row['Amount'] is None) # {16} for column, expression in ( ('Sender', 'cells["Column 1"].value.partition(" from ")[2]' '.partition(" on ")[0]'), ('Recipient', 'cells["Column 1"].value.partition(" received ")[0]'), ('Amount', 'cells["Column 1"].value.partition(" received ")[2]' '.partition(" from ")[0]')): self.project.text_transform(column, expression) self.assertInResponse('2 cells') # {17} transaction_facet.reset() # {18} self.project.text_transform('Column 1', 'value.partition(" on ")[2]') self.assertInResponse('4 cells') # {19} self.project.reorder_columns( ['Transaction', 'Amount', 'Sender', 'Recipient']) self.assertInResponse('Reorder columns')
def test_facet(self): # Section "2. Exploration using Facets": {4} party_code_facet = facet.TextFacet(column='Party Code') response = self.project.compute_facets(party_code_facet) pc = response.facets[0] # test look by index same as look up by facet object self.assertEqual(pc, response.facets[party_code_facet]) self.assertEqual(pc.name, 'Party Code') self.assertEqual(pc.choices['D'].count, 3700) self.assertEqual(pc.choices['N'].count, 15) self.assertEqual(pc.blank_choice.count, 1446) # {5}, {6} engine = facet.Engine(party_code_facet) ethnicity_facet = facet.TextFacet(column='Ethnicity') engine.add_facet(ethnicity_facet) self.project.engine = engine response = self.project.compute_facets() e = response.facets[ethnicity_facet] self.assertEqual(e.choices['B'].count, 1255) self.assertEqual(e.choices['W'].count, 4469) # {7} ethnicity_facet.include('B') response = self.project.get_rows() self.assertEqual(response.filtered, 1255) indexes = [row.index for row in response.rows] self.assertEqual(indexes, [1, 2, 3, 4, 6, 12, 18, 26, 28, 32]) # {8} response = self.project.compute_facets() pc = response.facets[party_code_facet] self.assertEqual(pc.name, 'Party Code') self.assertEqual(pc.choices['D'].count, 1179) self.assertEqual(pc.choices['R'].count, 11) self.assertEqual(pc.blank_choice.count, 46) # {9} party_code_facet.include('R') response = self.project.compute_facets() e = response.facets[ethnicity_facet] self.assertEqual(e.choices['B'].count, 11) # {10} party_code_facet.reset() ethnicity_facet.reset() response = self.project.get_rows() self.assertEqual(response.filtered, 6958) # {11} office_title_facet = facet.TextFacet('Office Title') self.project.engine.add_facet(office_title_facet) response = self.project.compute_facets() self.assertEqual(len(response.facets[2].choices), 76) # {12} - XXX not sure how to interpret bins & baseBins yet office_level_facet = facet.NumericFacet('Office Level') self.project.engine.add_facet(office_level_facet) # {13} office_level_facet.From = 300 # from reserved word office_level_facet.to = 320 response = self.project.get_rows() self.assertEqual(response.filtered, 1907) response = self.project.compute_facets() ot = response.facets[office_title_facet] self.assertEqual(len(ot.choices), 21) self.assertEqual(ot.choices['Chief of Police'].count, 2) self.assertEqual(ot.choices['Chief of Police '].count, 211) # {14} self.project.engine.remove_all() response = self.project.get_rows() self.assertEqual(response.filtered, 6958) # {15} phone_facet = facet.TextFacet('Phone', expression='value[0, 3]') self.project.engine.add_facet(phone_facet) response = self.project.compute_facets() p = response.facets[phone_facet] self.assertEqual(p.expression, 'value[0, 3]') self.assertEqual(p.choices['318'].count, 2331) # {16} commissioned_date_facet = facet.NumericFacet( 'Commissioned Date', expression='value.toDate().datePart("year")') self.project.engine.add_facet(commissioned_date_facet) response = self.project.compute_facets() cd = response.facets[commissioned_date_facet] self.assertEqual(cd.error_count, 959) self.assertEqual(cd.numeric_count, 5999) # {17} office_description_facet = facet.NumericFacet( 'Office Description', expression=r'value.match(/\D*(\d+)\w\w Rep.*/)[0].toNumber()') self.project.engine.add_facet(office_description_facet) response = self.project.compute_facets() od = response.facets[office_description_facet] self.assertEqual(od.min, 0) self.assertEqual(od.max, 110) self.assertEqual(od.numeric_count, 548)
def test_editing(self): # Section "3. Cell Editing": {1} self.project.engine.remove_all() # redundant due to setUp # {2} self.project.text_transform(column='Zip Code 2', expression='value.toString()[0, 5]') self.assertInResponse('transform on 6067 cells in column Zip Code 2') # {3} - XXX history # {4} office_title_facet = facet.TextFacet('Office Title') self.project.engine.add_facet(office_title_facet) response = self.project.compute_facets() self.assertEqual(len(response.facets[office_title_facet].choices), 76) self.project.text_transform('Office Title', 'value.trim()') self.assertInResponse('6895') response = self.project.compute_facets() self.assertEqual(len(response.facets[office_title_facet].choices), 67) # {5} self.project.edit('Office Title', 'Councilmen', 'Councilman') self.assertInResponse('13') response = self.project.compute_facets() self.assertEqual(len(response.facets[office_title_facet].choices), 66) # {6} response = self.project.compute_clusters('Office Title') self.assertTrue(not response) # {7} clusters = self.project.compute_clusters('Office Title', 'knn') self.assertEqual(len(clusters), 7) first_cluster = clusters[0] self.assertEqual(len(first_cluster), 2) self.assertEqual(first_cluster[0]['value'], 'RSCC Member') self.assertEqual(first_cluster[0]['count'], 233) # Not strictly necessary to repeat 'Council Member' but a test # of mass_edit, and it's also what the front end sends. self.project.mass_edit('Office Title', [{ 'from': ['Council Member', 'Councilmember'], 'to': 'Council Member' }]) self.assertInResponse('372') response = self.project.compute_facets() self.assertEqual(len(response.facets[office_title_facet].choices), 65) # Section "4. Row and Column Editing, Batched Row Deletion" # Test doesn't strictly follow the tutorial as the "Browse this # cluster" performs a text facet which the server can't complete # as it busts its max facet count. The useful work is done with # get_rows(). Also, we can facet & select in one; the UI can't. # {1}, {2}, {3}, {4} clusters = self.project.compute_clusters('Candidate Name') for cluster in clusters[0:3]: # just do a few for match in cluster: # {2} if match['value'].endswith(', '): response = self.project.get_rows( facet.TextFacet('Candidate Name', match['value'])) self.assertEqual(len(response.rows), 1) for row in response.rows: self.project.star_row(row) self.assertInResponse(str(row.index + 1)) # {5}, {6}, {7} response = self.project.compute_facets(facet.StarredFacet(True)) self.assertEqual(len(response.facets[0].choices), 2) # true & false self.assertEqual(response.facets[0].choices[True].count, 3) self.project.remove_rows() self.assertInResponse('3 rows')