Ejemplo n.º 1
0
def upload_csv(nid, path, table_name):
    eggroll_init()
    # df = pd.read_csv(path, index_col=id_col)
    # if label_col is not None:
    #     df[label_col] = df[label_col].astype('bool')
    # kv_data = [(idx, ','.join(row.astype(str).values)) for idx, row in df.iterrows()]
    kv_data = csv_read_data(path)
    namespace = gen_data_namespace(nid)
    return save_data(kv_data, name=table_name, namespace=namespace, error_if_exist=True)
Ejemplo n.º 2
0
 def save_output_data_table(self, data_table, data_name: str = 'component'):
     if data_table:
         persistent_table = data_table.save_as(
             namespace=data_table._namespace,
             name='{}_persistent'.format(data_table._name))
         storage.save_data_table_meta(
             {
                 'schema': data_table.schema,
                 'header': data_table.schema.get('header', [])
             },
             data_table_namespace=persistent_table._namespace,
             data_table_name=persistent_table._name)
         data_table_info = {
             data_name: {
                 'name': persistent_table._name,
                 'namespace': persistent_table._namespace
             }
         }
     else:
         data_table_info = {}
     storage.save_data(data_table_info.items(),
                       name=Tracking.output_table_name('data'),
                       namespace=self.table_namespace,
                       partition=48)
Ejemplo n.º 3
0
                work_mode = data.get('work_mode')
                if work_mode is None:
                    work_mode = 0

            if not os.path.exists(input_file_path):
                print("%s is not exist, please check the configure" %
                      (input_file_path))
                sys.exit()

            input_data = read_data(input_file_path, head)
            _namespace, _table_name = generate_table_name(input_file_path)
            if namespace is None:
                namespace = _namespace
            if table_name is None:
                table_name = _table_name
            eggroll.init(mode=work_mode)
            save_data(input_data,
                      name=table_name,
                      namespace=namespace,
                      partition=partition)

        except ValueError:
            print('json parse error')
            exit(-102)
        except IOError:
            print('read file error')
            exit(-103)
    except:
        traceback.print_exc()
Ejemplo n.º 4
0
            if not os.path.exists(input_file_path):
                print("%s is not exist, please check the configure" %
                      (input_file_path))
                sys.exit()

            _namespace, _table_name = generate_table_name(input_file_path)
            if namespace is None:
                namespace = _namespace
            if table_name is None:
                table_name = _table_name
            eggroll.init(job_id=args.job_id, mode=work_mode)
            input_data = read_data(input_file_path, table_name, namespace,
                                   head)
            data_table = storage.save_data(input_data,
                                           name=table_name,
                                           namespace=namespace,
                                           partition=partition)
            print("------------load data finish!-----------------")
            print("file: {}".format(input_file_path))
            print("total data_count: {}".format(data_table.count()))
            print("table name: {}, table namespace: {}".format(
                table_name, namespace))

        except ValueError:
            print('json parse error')
            exit(-102)
        except IOError:
            print('read file error')
            exit(-103)
    except:
        traceback.print_exc()
Ejemplo n.º 5
0
def save_data(input_data, table_name, namespace):
    storage.save_data(input_data, table_name, namespace)
Ejemplo n.º 6
0
def load_file(load_file_path):
    try:
        # args.config = os.path.abspath(args.config)
        input_file_path = None
        head = True
        table_name = None
        namespace = None
        with open(load_file_path, 'r') as f:
            data = json.load(f)
            try:
                input_file_path = data['file']
            except:
                traceback.print_exc()

            try:
                read_head = data['head']
                if read_head == 0:
                    head = False
                elif read_head == 1:
                    head = True
            except:
                print("'head' in .json should be 0 or 1, set head to 1")

            try:
                partition = data['partition']
                if partition <= 0 or partition > MAX_PARTITION_NUM:
                    print(
                        "Error number of partition, it should between %d and %d"
                        % (0, MAX_PARTITION_NUM))
                    sys.exit()
            except:
                print("set partition to 1")
                partition = 1

            try:
                table_name = data['table_name']
            except:
                print(
                    "not setting table_name or setting error, set table_name according to current time"
                )

            try:
                namespace = data['namespace']
            except:
                print(
                    "not setting namespace or setting error, set namespace according to input file name"
                )

            work_mode = data.get('work_mode')
            if work_mode is None:
                work_mode = 0
            else:
                work_mode = int(work_mode)

        if not os.path.exists(input_file_path):
            print("%s is not exist, please check the configure" %
                  (input_file_path))
            sys.exit()

        input_data = read_data(input_file_path, head)
        if True:
            eggroll.init(mode=work_mode)
            _namespace, _table_name = generate_table_name(input_file_path)
            if namespace is None:
                namespace = _namespace
            if table_name is None:
                table_name = _table_name
            save_data(input_data, table_name, namespace, partition, work_mode)

    except ValueError:
        print('json parse error')
        exit(-102)
    except IOError:
        print('read file error')
        exit(-103)