Example #1
0
 def test_transform(self):
     with job_config('task_1'):
         o = ops.read_dataset(read_dataset_processor=None,
                              dataset_info=DatasetMeta(name='dataset'))
         t = ops.transform(input=o, transform_processor=None)
         ops.write_dataset(input=t,
                           dataset_info=DatasetMeta(name='dataset'))
     self.assertEqual(3, len(current_graph().nodes))
     self.assertEqual(2, len(current_graph().edges))
Example #2
0
 def test_predict(self):
     with job_config('task_1'):
         o = ops.read_dataset(read_dataset_processor=None,
                              dataset_info=DatasetMeta(name='dataset'))
         t = ops.predict(input=o,
                         prediction_processor=None,
                         model_info=ModelMeta(name='model'),
                         name='a')
         ops.write_dataset(input=t,
                           dataset_info=DatasetMeta(name='dataset'))
     self.assertEqual(3, len(current_graph().nodes))
     self.assertEqual(2, len(current_graph().edges))
     n = self.get_node_by_name('a')
     self.assertEqual('model', n.node_config.get('model_info').name)
Example #3
0
 def test_save_datasets_list_datasets(self):
     schema = Schema(name_list=['a'],
                     type_list=[DataType.STRING])
     dataset_1 = DatasetMeta(name='dataset1', data_format='csv',
                             properties=Properties({'a': 'b'}), schema=schema)
     dataset_2 = DatasetMeta(name='dataset2')
     response = self.store.register_datasets([dataset_1, dataset_2])
     self.assertEqual(len(response), 2)
     self.assertEqual(1, response[0].uuid)
     self.assertEqual(2, response[1].uuid)
     response_list = self.store.list_datasets(2, 0)
     self.assertEqual(2, len(response_list))
     self.assertEqual('dataset1', response_list[0].name)
     self.assertEqual('dataset2', response_list[1].name)
Example #4
0
 def test_read_write_dataset(self):
     with job_config('task_1'):
         o = ops.read_dataset(read_dataset_processor=None,
                              dataset_info=DatasetMeta(name='source'))
         ops.write_dataset(input=o, dataset_info=DatasetMeta(name='sink'))
     self.assertEqual(2, len(current_graph().nodes))
     self.assertEqual(1, len(current_graph().edges))
     node_list = list(current_graph().nodes.values())
     for node in node_list:
         if isinstance(node, ReadDatasetNode):
             self.assertEqual('source',
                              node.node_config.get('dataset').name)
         elif isinstance(node, WriteDatasetNode):
             self.assertEqual('sink', node.node_config.get('dataset').name)
         self.assertEqual('mock', node.config.job_type)
Example #5
0
def transform_dataset_meta(dataset_proto):
    properties = dataset_proto.properties
    if properties == {}:
        properties = None
    name_list = dataset_proto.schema.name_list
    type_list = dataset_proto.schema.type_list
    if not name_list:
        name_list = None
    if not type_list:
        data_type_list = None
    else:
        data_type_list = []
        for c in type_list:
            data_type_list.append(DataType(DataTypeProto.Name(c)))
    schema = Schema(name_list=name_list, type_list=data_type_list)
    return DatasetMeta(name=dataset_proto.name,
                       data_format=dataset_proto.data_format.value if dataset_proto.HasField('data_format') else None,
                       description=dataset_proto.description.value if dataset_proto.HasField('description') else None,
                       uri=dataset_proto.uri.value if dataset_proto.HasField('uri') else None,
                       create_time=dataset_proto.create_time.value if dataset_proto.HasField('create_time') else None,
                       update_time=dataset_proto.update_time.value if dataset_proto.HasField('update_time') else None,
                       properties=properties,
                       schema=schema,
                       catalog_name=dataset_proto.catalog_name.value if dataset_proto.HasField(
                           'catalog_name') else None,
                       catalog_type=dataset_proto.catalog_type.value if dataset_proto.HasField(
                           'catalog_type') else None,
                       catalog_database=dataset_proto.catalog_database.value if dataset_proto.HasField(
                           'catalog_database') else None,
                       catalog_connection_uri=dataset_proto.catalog_connection_uri.value \
                           if dataset_proto.HasField('catalog_connection_uri') else None,
                       catalog_table=dataset_proto.catalog_table.value if dataset_proto.HasField(
                           'catalog_table') else None)
 def result_to_dataset_meta(dataset_result) -> DatasetMeta:
     properties = dataset_result.properties
     if properties is not None:
         properties = ast.literal_eval(properties)
     name_list = dataset_result.name_list
     if name_list is not None:
         name_list = ast.literal_eval(name_list)
     type_list = dataset_result.type_list
     if type_list is not None:
         type_list = ast.literal_eval(type_list)
         data_type_list = []
         for data_type in type_list:
             data_type_list.append(DataType(data_type))
     else:
         data_type_list = None
     schema = Schema(name_list=name_list, type_list=data_type_list)
     return DatasetMeta(
         uuid=dataset_result.uuid,
         name=dataset_result.name,
         data_format=dataset_result.format,
         description=dataset_result.description,
         uri=dataset_result.uri,
         create_time=dataset_result.create_time,
         update_time=dataset_result.update_time,
         schema=schema,
         properties=properties,
         catalog_name=dataset_result.catalog_name,
         catalog_type=dataset_result.catalog_type,
         catalog_database=dataset_result.catalog_database,
         catalog_connection_uri=dataset_result.catalog_connection_uri,
         catalog_table=dataset_result.catalog_table)
def build_ai_graph(node_number, job_number) -> AIGraph:
    graph = AIGraph()
    for i in range(node_number):
        j = i % job_number
        config = JobConfig(job_name='job_{}'.format(j), job_type='mock')
        if 0 == i:
            ai_node = ReadDatasetNode(dataset=DatasetMeta(name='source'))
        elif 3 == i:
            ai_node = WriteDatasetNode(dataset=DatasetMeta(name='sink'))
        else:
            ai_node = AINode()
        ai_node.config = config
        graph.nodes[ai_node.node_id] = ai_node

    add_data_edge(graph=graph, to_='AINode_4', from_='ReadDatasetNode_0')
    add_data_edge(graph=graph, to_='AINode_4', from_='WriteDatasetNode_1')
    add_control_edge(graph, 'job_2', 'job_0')
    add_control_edge(graph, 'job_2', 'job_1')

    return graph
Example #8
0
 def test_dataset_validate(self):
     with job_config('task_1'):
         o = ops.read_dataset(read_dataset_processor=None,
                              dataset_info=DatasetMeta(name='dataset'),
                              name='a')
         ops.dataset_validate(input=o,
                              dataset_validation_processor=None,
                              name='b')
     self.assertEqual(2, len(current_graph().nodes))
     self.assertEqual(1, len(current_graph().edges))
     n = self.get_node_by_name('a')
     self.assertEqual('dataset', n.node_config.get('dataset').name)
 def test_write_node_creation(self):
     node = WriteDatasetNode(dataset=DatasetMeta(name='sink'))
     self.assertEqual('sink', node.node_config['dataset'].name)
 def test_read_node_creation(self):
     node = ReadDatasetNode(dataset=DatasetMeta(name='source'))
     self.assertEqual('source', node.node_config['dataset'].name)