Exemple #1
0
 def test_cell_values_with_long_name(self):
     cell_str = 'B_subtilis_WT_JH642_Colony_1, B_subtilis_WT_JH642_Colony_2, B_subtilis_WT_JH642_Colony_3'
     expected_values = [
         'B_subtilis_WT_JH642_Colony_1', 'B_subtilis_WT_JH642_Colony_2',
         'B_subtilis_WT_JH642_Colony_3'
     ]
     self.assertListEqual(expected_values, tu.extract_name_value(cell_str))
Exemple #2
0
 def _process_control_with_captions(self, cell, control_tables):
     controls = []
     for table_caption in table_utils.extract_name_value(cell.get_text()):
         canonicalize_caption = ''.join(table_caption.lower().split())
         if canonicalize_caption in control_tables:
             for control in control_tables[canonicalize_caption]:
                 controls.append(control)
     return controls
Exemple #3
0
 def _process_reagent_media(self, cell, header_cell):
     reagents_media = []
     text = cell.get_text()
     name_dict, timepoint_dict = cell_parser.PARSER.process_reagent_header(
         header_cell, self._timepoint_units, unit_type='timepoints')
     # Determine if cells is numerical or name value
     if table_utils.is_valued_cells(text):
         try:
             for value, unit in table_utils.transform_cell(
                     text, self._fluid_units, cell_type='fluid'):
                 if timepoint_dict:
                     numerical_dict = {
                         'name': name_dict,
                         'value': value,
                         'unit': unit,
                         'timepoint': timepoint_dict
                     }
                 else:
                     numerical_dict = {
                         'name': name_dict,
                         'value': value,
                         'unit': unit
                     }
                 reagents_media.append(numerical_dict)
         except TableException as err:
             message = err.get_message()
             self._validation_errors.append(message)
     elif table_utils.is_number(text):
         err = '%s is missing a unit' % text
         message = 'Measurement table has invalid reagent/media value: %s' % err
         self._validation_errors.append(message)
         return []
     else:
         for name in table_utils.extract_name_value(text):
             if timepoint_dict:
                 named_dict = {
                     'name': name_dict,
                     'value': name,
                     'timepoint': timepoint_dict
                 }
             else:
                 named_dict = {'name': name_dict, 'value': name}
             reagents_media.append(named_dict)
     return reagents_media
Exemple #4
0
 def parse_content_item(self, cell, fluid_units={}, timepoint_units={}):
     list_of_contents = []
     tokens = self._cell_tokenizer.tokenize(cell.get_text(),
                                            keep_skip=False)
     if len(tokens) < 1:
         raise TableException('Invalid value: %s does not contain a name' %
                              cell.get_text())
     cell_type = self._get_token_type(self._cell_parser.parse(tokens))
     label, value, unit, timepoint_value, timepoint_unit = (None, None,
                                                            None, None,
                                                            None)
     if cell_type == 'NAME_VALUE_UNIT_TIMEPOINT':
         label, value, unit, timepoint_value, timepoint_unit = self._get_name_values_unit_timepoint(
             tokens)
         content = {}
         content['name'] = self.process_name_with_uri(
             label, cell.get_text_with_url())
         content['value'] = value
         content['unit'] = self.process_content_item_unit(
             unit, fluid_units, timepoint_units)
         content['timepoints'] = self.process_timepoint(
             timepoint_value, timepoint_unit, timepoint_units)
         list_of_contents.append(content)
     elif cell_type == 'NAME_VALUE_UNIT':
         label, value, unit = self._get_name_values_unit(tokens)
         content = {}
         content['name'] = self.process_name_with_uri(
             label, cell.get_text_with_url())
         content['value'] = value
         content['unit'] = self.process_content_item_unit(
             unit, fluid_units, timepoint_units)
         list_of_contents.append(content)
     elif cell_type == 'NAME':
         labels = table_utils.extract_name_value(cell.get_text())
         for label in labels:
             content = {}
             content['name'] = self.process_name_with_uri(
                 label, cell.get_text_with_url())
             list_of_contents.append(content)
     else:
         raise TableException('Unable to parse %s' % cell.get_text())
     return list_of_contents
Exemple #5
0
 def _process_file_type(self, cell):
     file_type = cell.get_text()
     return [value for value in table_utils.extract_name_value(file_type)]
Exemple #6
0
 def test_cell_with_unicode_characters(self):
     cell_str = '\x0bApp'
     self.assertTrue('App', tu.extract_name_value(cell_str))
Exemple #7
0
 def test_cell_with_trailing_whitespace(self):
     cell_str = 'Yeast1_, Yeast2_, Yeast3_ '
     exp_res = ['Yeast1_', 'Yeast2_', 'Yeast3_']
     for name in tu.extract_name_value(cell_str):
         self.assertTrue(name in exp_res)
Exemple #8
0
 def test_cell_values_with_named_and_numerical_spacing(self):
     cell_str = 'B. subtilis 168 PmtlA-comKS'
     for name in tu.extract_name_value(cell_str):
         self.assertEquals(cell_str, name)
Exemple #9
0
 def test_cell_values_with_named_spacing(self):
     cell_str = 'Yeast_Extract_Peptone_Adenine_Dextrose (a.k.a. YPAD Media)'
     for name in tu.extract_name_value(cell_str):
         self.assertEquals(cell_str, name)
Exemple #10
0
 def test_cell_values_with_one_name(self):
     cell_str = 'CSV'
     expected_values = ['CSV']
     self.assertListEqual(expected_values, tu.extract_name_value(cell_str))
Exemple #11
0
 def test_cell_values_without_underscore(self):
     cell_str = 'CSV, FCS'
     expected_values = ['CSV', 'FCS']
     self.assertListEqual(expected_values, tu.extract_name_value(cell_str))
Exemple #12
0
 def test_cell_values_with_name_containing_underscore_numbers(self):
     cell_str = 'AND_00, NAND_00'
     expected_values = ['AND_00', 'NAND_00']
     self.assertListEqual(expected_values, tu.extract_name_value(cell_str))