예제 #1
0
    def testGettersAndSetters(self):
        """Test behavior of concept, slice, and table setters and getters."""
        self.dspl_dataset.AddImport(
            dspl_model.Import(namespace_id='import1',
                              namespace_url='import1_url'))
        self.dspl_dataset.AddTopic(
            dspl_model.Topic(topic_id='topic1',
                             children=[dspl_model.Topic(topic_id='topic2')]))
        self.dspl_dataset.AddTopic(dspl_model.Topic(topic_id='topic3'))
        self.dspl_dataset.AddConcept(dspl_model.Concept(concept_id='concept1'))
        self.dspl_dataset.AddConcept(dspl_model.Concept(concept_id='concept2'))
        self.dspl_dataset.AddSlice(dspl_model.Slice(slice_id='slice1'))
        self.dspl_dataset.AddTable(dspl_model.Table(table_id='table1'))

        self.assertEqual(
            self.dspl_dataset.GetImport('import1').namespace_url,
            'import1_url')
        self.assertEqual(
            self.dspl_dataset.GetTopic('topic1').topic_id, 'topic1')
        self.assertEqual(
            self.dspl_dataset.GetTopic('topic2').topic_id, 'topic2')
        self.assertEqual(
            self.dspl_dataset.GetTopic('topic3').topic_id, 'topic3')
        self.assertEqual(
            self.dspl_dataset.GetConcept('concept2').concept_id, 'concept2')
        self.assertEqual(
            self.dspl_dataset.GetSlice('slice1').slice_id, 'slice1')
        self.assertEqual(
            self.dspl_dataset.GetTable('table1').table_id, 'table1')
        self.assertEqual(self.dspl_dataset.GetConcept('concept3'), None)
        self.assertEqual(self.dspl_dataset.GetSlice('slice3'), None)
        self.assertEqual(self.dspl_dataset.GetTable('table3'), None)
예제 #2
0
    def testDatasetXMLCreation(self):
        """Create dataset using models, then compare output to expected XML."""
        self.dspl_dataset.name = 'My Dataset'
        self.dspl_dataset.description = 'My Dataset Description'
        self.dspl_dataset.url = 'url1'

        self.dspl_dataset.provider_name = 'Googler'
        self.dspl_dataset.provider_url = 'url2'

        self.dspl_dataset.AddImport(
            dspl_model.Import(namespace_id='imported_namespace1',
                              namespace_url='http://imported_namespace1_url'))

        self.dspl_dataset.AddImport(
            dspl_model.Import(namespace_id='imported_namespace2',
                              namespace_url='http://imported_namespace2_url'))

        topic1 = dspl_model.Topic(topic_id='topic1', topic_name='topic1_name')
        topic2 = dspl_model.Topic(topic_id='topic2', topic_name='topic2_name')
        topic3 = dspl_model.Topic(topic_id='topic3', topic_name='topic3_name')
        topic4 = dspl_model.Topic(topic_id='topic4', topic_name='topic4_name')

        topic1.children = [topic2, topic3]

        self.dspl_dataset.AddTopic(topic1)
        self.dspl_dataset.AddTopic(topic4)

        self.dspl_dataset.AddConcept(
            dspl_model.Concept(
                concept_id='concept1',
                concept_name='Concept 1',
                concept_description='Concept 1 Description',
                table_ref='table2',
                concept_extension_reference='entity:entity',
                data_type='string',
                attributes=[
                    dspl_model.Attribute(concept_ref='attribute_concept',
                                         value='attribute_value')
                ],
                properties=[
                    dspl_model.Property(concept_ref='property_concept'),
                    dspl_model.Property(concept_ref='another_property_concept',
                                        is_parent=True)
                ]))

        self.dspl_dataset.AddConcept(
            dspl_model.Concept(concept_id='concept2',
                               concept_name='Concept 2',
                               concept_description='Concept 2 Description',
                               data_type='integer',
                               topic_references=['topic1', 'topic2']))

        self.dspl_dataset.AddConcept(
            dspl_model.Concept(concept_id='geo:country',
                               concept_reference='geo:country',
                               data_type='string'))

        self.dspl_dataset.AddSlice(
            dspl_model.Slice(slice_id='data_slice',
                             dimension_refs=['concept1', 'geo:country'],
                             metric_refs=['concept2'],
                             dimension_map={
                                 'concept1': 'concept_column1',
                                 'geo:country': 'concept_column3'
                             },
                             metric_map={'concept2': 'concept_column2'},
                             table_ref='table3'))

        self.dspl_dataset.AddTable(
            dspl_model.Table(
                table_id='table',
                columns=[
                    dspl_model.TableColumn('col1', 'string', '', ''),
                    dspl_model.TableColumn('col2', 'integer', '', '1234')
                ],
                file_name='mydata.csv',
                verbose=False))

        xml_output = self.dspl_dataset.ToXMLElement()

        for element_tuple in itertools.izip_longest(
                xml_output.getiterator(),
                xml.etree.ElementTree.fromstring(TEST_DSPL_XML).getiterator()):
            constructed_element = element_tuple[0]
            expected_element = element_tuple[1]

            # Compare tag names
            constructed_tag_name = constructed_element.tag

            # Remove namespace prefixes from expected tag
            expected_tag_name = re.search('^(?:\{.*\})?(.*)$',
                                          expected_element.tag).group(1)

            self.assertEqual(constructed_tag_name, expected_tag_name)

            # Compare tag attributes, ignoring these for dspl and value tags
            if (constructed_element.tag != 'dspl'
                    and constructed_element.tag != 'value'):
                self.assertEqual(sorted(constructed_element.items()),
                                 sorted(expected_element.items()))

            # Compare tag text
            constructed_text = constructed_element.text
            expected_text = expected_element.text

            # Handle differences in how test and expected results handle text
            if expected_text:
                expected_text = expected_text.strip()

            if expected_text == '':
                expected_text = None

            self.assertEqual(constructed_text, expected_text)
예제 #3
0
def ElementTreeToDataset(element_tree, namespaces, csv_path, load_all_data):
    """Convert an ElementTree tree model into a DataSet object.

  Args:
    element_tree: ElementTree.ElementTree object containing complete data from
                  DSPL XML file
    namespaces: A list of (namespace_id, namespace_url) tuples
    csv_path: Directory where CSV files associated with dataset can be found
    load_all_data: Boolean indicating whether all CSV data should be loaded

  Returns:
    dspl_model.DataSet object
  """
    dspl_dataset = dspl_model.DataSet()

    # Fill in basic info
    dspl_dataset.namespace = element_tree.getroot().get(_DSPL_SCHEMA_PREFIX +
                                                        'targetNamespace',
                                                        default='')

    for namespace_id, namespace_url in namespaces:
        if namespace_id:
            dspl_dataset.AddImport(
                dspl_model.Import(namespace_id=namespace_id,
                                  namespace_url=namespace_url))

    info_element = element_tree.find(_DSPL_SCHEMA_PREFIX + 'info')

    if info_element is not None:
        dspl_dataset.name = _GetValue(
            info_element.find(_DSPL_SCHEMA_PREFIX + 'name'))
        dspl_dataset.description = (_GetValue(
            info_element.find(_DSPL_SCHEMA_PREFIX + 'description')))
        dspl_dataset.url = (_GetValue(
            info_element.find(_DSPL_SCHEMA_PREFIX + 'url')))

    provider_element = element_tree.find(_DSPL_SCHEMA_PREFIX + 'provider')

    if provider_element is not None:
        dspl_dataset.provider_name = _GetValue(
            provider_element.find(_DSPL_SCHEMA_PREFIX + 'name'))
        dspl_dataset.provider_url = (_GetValue(
            provider_element.find(_DSPL_SCHEMA_PREFIX + 'url')))

    # Get topics
    topics_element = element_tree.find(_DSPL_SCHEMA_PREFIX + 'topics')

    if topics_element is not None:
        topic_elements = topics_element.findall(_DSPL_SCHEMA_PREFIX + 'topic')

        for topic_element in topic_elements:
            dspl_dataset.AddTopic(ElementToTopic(topic_element))

    # Get concepts
    concepts_element = element_tree.find(_DSPL_SCHEMA_PREFIX + 'concepts')

    if concepts_element is not None:
        concept_elements = concepts_element.findall(_DSPL_SCHEMA_PREFIX +
                                                    'concept')

        for concept_element in concept_elements:
            dspl_dataset.AddConcept(ElementToConcept(concept_element))

    # Get slices
    slices_element = element_tree.find(_DSPL_SCHEMA_PREFIX + 'slices')

    if slices_element is not None:
        slice_elements = slices_element.findall(_DSPL_SCHEMA_PREFIX + 'slice')

        for slice_element in slice_elements:
            dspl_dataset.AddSlice(ElementToSlice(slice_element, dspl_dataset))

    # Get tables
    tables_element = element_tree.find(_DSPL_SCHEMA_PREFIX + 'tables')

    if tables_element is not None:
        table_elements = tables_element.findall(_DSPL_SCHEMA_PREFIX + 'table')

        for table_element in table_elements:
            dspl_dataset.AddTable(
                ElementToTable(table_element, csv_path, load_all_data))

    return dspl_dataset