Esempio n. 1
0
 def test_get_paragraphs_between(self):
     paragraphs = parsedoc.parse_docx_paragraphs(self.rawxml)
     items = parsedoc.get_paragraphs_between(paragraphs, 'Main items',
                                             'Other stuff')
     self.assertEqual(
         ['Very large tomatoes', 'Huge apricots', 'Mediocre marrows'],
         items)
Esempio n. 2
0
    def read_variables(self, startmarker, endmarker):
        """Read variables table from document and return as list of VariableSpecification objects."""

        # get relevant paragraphs
        paragraphs_variables = parsedoc.get_paragraphs_between(
            self.paragraphs_all, startmarker, endmarker)

        # build list
        variables = []
        current_variable = None
        for paragraph in paragraphs_variables:
            if paragraph.startswith('Variable: '):
                fields = paragraph.split(' ')
                name = fields[1]
                dtype = VARIABLE_TYPES[fields[2]]
                current_variable = VariableSpecification(name, dtype)
                variables.append(current_variable)
            elif paragraph:
                key = paragraph.split(': ')[0]
                value = paragraph[(len(key) + 2):]
                # allow arrays of integers as needed for flag meanings
                if value and (value[0] == '['):
                    value = ast.literal_eval(value)
                current_variable.add_metadata(key, value)

        # return result
        return variables
Esempio n. 3
0
    def read_name_list(self, startmarker, endmarker):
        """Read a comma-separated list."""

        # Read relevant line (comma-separated list)
        namelines = parsedoc.get_paragraphs_between(self.paragraphs_all,
                                                    startmarker, endmarker)

        # Parse
        return namelines[0].strip().split(', ')
Esempio n. 4
0
 def test_get_paragraphs_containing(self):
     paragraphs = parsedoc.parse_docx_paragraphs(self.rawxml)
     items = parsedoc.get_paragraphs_between(paragraphs, 'Main items',
                                             'Other stuff')
     measurements = parsedoc.get_paragraphs_containing(
         paragraphs, items, 'Measurement:')
     self.assertEqual(
         {
             'Very large tomatoes': 'Measurement: 20cm',
             'Huge apricots': 'Measurement: 15cm',
             'Mediocre marrows': 'Measurement: 12cm'
         }, measurements)
Esempio n. 5
0
def get_dataset_list():
    """Parse and return expected array of descriptors per dataset."""
    pathname = os.path.normpath(
        os.path.join(eustaceconfig.SYSTEM_PATH,
                     DELIVERABLE_D4_3_RELATIVE_PATH))
    inputfile = open(pathname, 'rb')
    rawxml = retrieve_document_xml(inputfile)
    paragraphs = parse_docx_paragraphs(rawxml, ignore_trailing_digits=True)
    names = get_paragraphs_between(paragraphs, DATASET_CONTENTS_START,
                                   DATASET_CONTENTS_END)
    paths = get_paragraphs_containing(paragraphs, names,
                                      eustaceconfig.WORKSPACE_PATH)
    result = [CatalogueDataSet(name, paths.get(name)) for name in names]
    return result
Esempio n. 6
0
    def read_attributes(self, startmarker, endmarker):
        """Read attributes table from document and return as dictionary."""

        # get relevant paragraphs
        paragraphs_attributes = parsedoc.get_paragraphs_between(
            self.paragraphs_all, startmarker, endmarker)

        # parse attributes table
        attributes = {}
        for paragraph in paragraphs_attributes:
            column = paragraph.split(': ')
            key = column[0]
            value = column[1]
            attributes[key] = value

        # return result
        return attributes
Esempio n. 7
0
    def read_dimensions(self, startmarker, endmarker):
        """Read required dimensions from document and return as dictionary of required values (or None to indicate UNLIMITED)."""

        # get relevant paragraphs
        paragraphs_dimensions = parsedoc.get_paragraphs_between(
            self.paragraphs_all, startmarker, endmarker)

        # parse dimensions required
        dimensions_required = {}
        for paragraph in paragraphs_dimensions[1:]:
            column = paragraph.split(': ')
            if column[1] == DIMENSION_UNLIMITED:
                dimensions_required[column[0]] = None
            else:
                dimensions_required[column[0]] = int(column[1])

        # return result
        return dimensions_required