def test_rep_html_table_struct_complex(self): expected_table_output = [['td-1', 0, 0, 0, 0, 0, 0, 0, 0, 0], ['td-2', 'td-2', 'td-2', 'td-2', 'td-2', 'td-2', 'td-2', 'td-2', 'td-2', 'td-2'], ['td-3', 'td-4', 'td-4', 'td-5', 'td-5', 'td-6', 'td-6', 0, 0, 0], ['td-3', 'td-7', 'td-8', 'td-9', 'td-10', 'td-11', 'td-12', 0, 0, 0], ['td-13', 'td-13', 'td-13', 'td-13', 'td-13', 'td-13', 'td-13', 'td-13', 'td-13', 'td-13'], ['td-14', 'td-15', 'td-16', 'td-17', 'td-18', 'td-19', 'td-20', 0, 0, 0], ['td-21', 'td-22', 'td-23', 'td-24', 'td-25', 'td-26', 'td-27', 0, 0, 0], ['td-28', 'td-29', 'td-30', 'td-31', 'td-32', 'td-33', 'td-34', 0, 0, 0], ['td-35', 'td-36', 'td-37', 'td-38', 'td-39', 'td-40', 'td-41', 0, 0, 0], ['td-42', 'td-43', 'td-44', 'td-45', 'td-46', 'td-47', 'td-48', 0, 0, 0], ['td-49', 'td-50', 'td-51', 'td-52', 'td-53', 'td-54', 'td-55', 0, 0, 0], ['td-56', 'td-57', 'td-58', 'td-59', 'td-60', 'td-61', 'td-62', 0, 0, 0], ['td-63', 'td-64', 'td-65', 'td-66', 'td-67', 'td-68', 'td-69', 0, 0, 0], ['td-70', 'td-71', 'td-72', 'td-73', 'td-74', 'td-75', 'td-76', 0, 0, 0], ['td-77', 'td-78', 'td-79', 'td-80', 'td-81', 'td-82', 'td-83', 0, 0, 0]] html_table_text = self.load_html_table_complex a, b, html_id_table = rep_html_table_struct(html_table_text) self.assertEqual(html_id_table, expected_table_output)
def test_rep_html_table_struct_simple(self): expected_table_output = [['th-1', 'th-2', 'th-3', 'th-4', 'th-5', 'th-6'], ['td-1', 'td-1', 'td-1', 'td-1', 'td-1', 'td-1'], ['td-2', 'td-3', 'td-4', 'td-5', 'td-6', 'td-7'], ['td-8', 'td-9', 'td-10', 'td-11', 'td-12', 'td-13'], ['td-14', 'td-15', 'td-16', 'td-17', 'td-18', 'td-19'], ['td-20', 'td-21', 'td-22', 'td-23', 'td-24', 'td-25'], ['td-26', 'td-27', 'td-28', 'td-29', 'td-30', 'td-31'], ['td-32', 'td-33', 'td-34', 'td-35', 'td-36', 'td-37'], ['td-38', 'td-39', 'td-40', 'td-41', 'td-42', 'td-43'], ['td-44', 'td-45', 'td-46', 'td-47', 'td-48', 'td-49'], ['td-50', 'td-51', 'td-52', 'td-53', 'td-54', 'td-55'], ['td-56', 'td-57', 'td-58', 'td-59', 'td-60', 'td-61'], ['td-62', 'td-63', 'td-64', 'td-65', 'td-66', 'td-67'], ['td-68', 'td-69', 'td-70', 'td-71', 'td-72', 'td-73'], ['td-74', 'td-75', 'td-76', 'td-77', 'td-78', 'td-79'], ['td-80', 'td-81', 'td-82', 'td-83', 'td-84', 'td-85'], ['td-86', 'td-87', 'td-88', 'td-89', 'td-90', 'td-91'], ['td-92', 'td-93', 'td-94', 'td-95', 'td-96', 'td-97'], ['td-98', 'td-99', 'td-100', 'td-101', 'td-102', 'td-103'], ['td-104', 'td-105', 'td-106', 'td-107', 'td-108', 'td-109']] html_table_text = self.load_html_table_simple a, b, html_id_table = rep_html_table_struct(html_table_text) self.assertEqual(html_id_table, expected_table_output)
def find_data_vals_in_table(data_table_object): """Parses neuroelectro.models DataTable object for assigned NeuronConceptMaps and EphysConceptMaps and returns a dictionary of data values at the row and column intersection of these Args: data_table_object: neuroelectro.models DataTable object with associated NeuronConceptMap and EphysConceptMap objects. Returns: (dict) return_dict: a dictionary with keys corresponding to html tag elements from the entered data table and fields: 'ncm_pk', 'ecm_pk', 'efcm_pk_list', and 'data_dict' Example: >>> find_data_vals_in_table(data_table_ob) {'td-69': {'ncm_pk': 1L, 'ecm_pk': 1L, 'ref_text': 'ref_text': u'463\xa0\xb1\xa089\xa0 (15)', 'data_value_dict': {'num_obs': 15, 'max_range': None, 'min_range': None, 'value': 463.0, 'error': 89.0}, 'efcm_pk_list': [] } } """ # check that data_table_object has both ephys obs and neuron concept obs return_dict = dict() try: table_soup = BeautifulSoup(data_table_object.table_html, 'lxml') ds = m.DataSource.objects.get(data_table=data_table_object) ecm_obs = ds.ephysconceptmap_set.all() ncm_obs = ds.neuronconceptmap_set.all() efcm_obs = ds.expfactconceptmap_set.all() # first check if there are ephys and neuron concept maps assigned to table if ecm_obs.count() > 0 and ncm_obs.count() > 0: # returns a flattened, parsed form of table where data table cells can be easily checked for associated concept maps dataTable, numHeaderRows, html_tag_id_table = rep_html_table_struct(data_table_object.table_html) # if dataTable or idTable is none, parsing table failed, so return if dataTable is None or html_tag_id_table is None: return dict() # for each neuron concept map for ncm in ncm_obs: ncm_html_tag_id = ncm.dt_id # the same ncm may be linked to multiple data table value cells due to rowspan/colspan issues matching_neuron_cells = get_matching_inds(ncm_html_tag_id, html_tag_id_table) for ecm in ecm_obs: ecm_html_tag_id = ecm.dt_id if ecm_html_tag_id == '-1' or len(html_tag_id_table) == 0: continue # the same ecm may be linked to multiple data table cells matching_ephys_cells = get_matching_inds(ecm_html_tag_id, html_tag_id_table) # iterate through all matched cells, finding corresponding data value cells at their intersection for c1 in matching_neuron_cells: ncm_row_ind = c1[0] ncm_col_ind = c1[1] for c2 in matching_ephys_cells: ecm_row_ind = c2[0] ecm_col_ind = c2[1] # the max below is saying data cells are usually to the right and bottom of header cells table_cell_row_ind = max(ncm_row_ind, ecm_row_ind) table_cell_col_ind = max(ncm_col_ind, ecm_col_ind) table_cell_html_tag_id = html_tag_id_table[table_cell_row_ind][table_cell_col_ind] data_tag = table_soup.find(id=table_cell_html_tag_id) if data_tag is None: continue ref_text = data_tag.get_text() # regex out the floating point values of data value string data_dict = resolve_data_float(ref_text, True) if data_dict['value']: # check for experimental factor concept maps if efcm_obs is not None: # get efcm and add it to nedm efcm_pk_list = [] for efcm in efcm_obs: efcm_html_tag_id = efcm.dt_id # get table cells for this efcm matching_efcm_cells = get_matching_inds(efcm_html_tag_id, html_tag_id_table) # if any of the efcm cells match up with the current cell, add it to the list matching_rows = [e[0] for e in matching_efcm_cells] matching_cols = [e[1] for e in matching_efcm_cells] if table_cell_row_ind in matching_rows or table_cell_col_ind in matching_cols: # if efcm is num obs, store value in appropriate place and don't add to metadata list if efcm.metadata.name == 'NumObs' and data_dict['num_obs'] is None: data_dict['num_obs'] = efcm.metadata.cont_value.mean else: efcm_pk_list.append(efcm.pk) temp_return_dict = dict() temp_return_dict['ncm_pk'] = ncm.pk temp_return_dict['ecm_pk'] = ecm.pk temp_return_dict['ref_text'] = ref_text temp_return_dict['data_value_dict'] = data_dict temp_return_dict['efcm_pk_list'] = efcm_pk_list return_dict[table_cell_html_tag_id] = temp_return_dict return return_dict except TypeError: return dict()