Exemple #1
0
def do_export_file(job_id, _data):
    try:
        work_mode = _data.get("work_mode")
        name = _data.get("name")
        namespace = _data.get("namespace")
        delimitor = _data.get("delimitor", ",")
        output_path = _data.get("output_path")

        eggroll.init(job_id, work_mode)

        with open(os.path.abspath(output_path), "w") as fout:
            data_table = storage.get_data_table(name=name, namespace=namespace)
               
            print('===== begin to export data =====')
            lines = 0

            for key, value in data_table.collect():
                if not value:
                    fout.write(key + "\n")
                else:
                    fout.write(key + delimitor + value + "\n")
                
                lines += 1
                if lines % 2000 == 0:
                    print("===== export {} lines =====".format(lines))

            print("===== export {} lines totally =====".format(lines))
            print('===== export data finish =====')
    except:
        raise ValueError("cannot export data, please check json file")
Exemple #2
0
    def read_data(self, table_name, namespace, mode="fit"):
        input_data = storage.get_data_table(table_name, namespace)
        LOGGER.info("start to read dense data and change data to instance")
        
        abnormal_detection.empty_table_detection(input_data)
        
        input_data_features = None
        input_data_labels = None

        if self.with_label:
            if type(self.label_idx).__name__ != "int":
                raise ValueError("label index should be integer")

            data_shape = data_overview.get_data_shape(input_data)
            if not data_shape or self.label_idx >= data_shape:
                raise ValueError("input data's value is empty, it does not contain a label")

            input_data_features = input_data.mapValues(
                lambda value: [] if data_shape == 1 else value.split(self.delimitor, -1)[:self.label_idx] + value.split(self.delimitor, -1)[
                                                                                 self.label_idx + 1:])
            input_data_labels = input_data.mapValues(lambda value: value.split(self.delimitor, -1)[self.label_idx])

        else:
            input_data_features = input_data.mapValues(lambda value: [] if not value else value.split(self.delimitor, -1))

        if mode == "fit":
            data_instance = self.fit(input_data_features, input_data_labels, table_name, namespace)
        else:
            data_instance = self.transform(input_data_features, input_data_labels)

        set_schema(data_instance, self.header)

        return data_instance
Exemple #3
0
def dtable(table_func):
    config = request.json
    if table_func == 'tableInfo':
        table_name, namespace = get_table_info(config=config, create=config.get('create', False))
        dtable = storage.get_data_table(name=table_name, namespace=namespace)
        if dtable:
            table_key_count = dtable.count()
        else:
            table_key_count = 0
        return get_json_result(data={'table_name': table_name, 'namespace': namespace, 'count': table_key_count})
    else:
        return get_json_result()
Exemple #4
0
    def read_data(self, table_name, namespace, mode="fit"):
        input_data = storage.get_data_table(table_name, namespace)
        LOGGER.info("start to read sparse data and change data to instance")

        abnormal_detection.empty_table_detection(input_data)

        if mode == "fit":
            data_instance = self.fit(input_data)
        else:
            data_instance = self.transform(input_data)

        set_schema(data_instance, self.header)
        return data_instance
Exemple #5
0
    def read_data(self, table_name, namespace, mode="fit"):
        input_data = storage.get_data_table(table_name, namespace)
        LOGGER.info("start to read sparse data and change data to instance")

        abnormal_detection.empty_table_detection(input_data)
        
        if not data_overview.get_data_shape(input_data):
            raise ValueError("input data's value is empty, it does not contain a label")
        
        if mode == "fit":
            data_instance = self.fit(input_data)
        else:
            data_instance = self.transform(input_data)

        set_schema(data_instance, self.header)
        return data_instance