def upload_csv(nid, path, table_name): eggroll_init() # df = pd.read_csv(path, index_col=id_col) # if label_col is not None: # df[label_col] = df[label_col].astype('bool') # kv_data = [(idx, ','.join(row.astype(str).values)) for idx, row in df.iterrows()] kv_data = csv_read_data(path) namespace = gen_data_namespace(nid) return save_data(kv_data, name=table_name, namespace=namespace, error_if_exist=True)
def save_output_data_table(self, data_table, data_name: str = 'component'): if data_table: persistent_table = data_table.save_as( namespace=data_table._namespace, name='{}_persistent'.format(data_table._name)) storage.save_data_table_meta( { 'schema': data_table.schema, 'header': data_table.schema.get('header', []) }, data_table_namespace=persistent_table._namespace, data_table_name=persistent_table._name) data_table_info = { data_name: { 'name': persistent_table._name, 'namespace': persistent_table._namespace } } else: data_table_info = {} storage.save_data(data_table_info.items(), name=Tracking.output_table_name('data'), namespace=self.table_namespace, partition=48)
work_mode = data.get('work_mode') if work_mode is None: work_mode = 0 if not os.path.exists(input_file_path): print("%s is not exist, please check the configure" % (input_file_path)) sys.exit() input_data = read_data(input_file_path, head) _namespace, _table_name = generate_table_name(input_file_path) if namespace is None: namespace = _namespace if table_name is None: table_name = _table_name eggroll.init(mode=work_mode) save_data(input_data, name=table_name, namespace=namespace, partition=partition) except ValueError: print('json parse error') exit(-102) except IOError: print('read file error') exit(-103) except: traceback.print_exc()
if not os.path.exists(input_file_path): print("%s is not exist, please check the configure" % (input_file_path)) sys.exit() _namespace, _table_name = generate_table_name(input_file_path) if namespace is None: namespace = _namespace if table_name is None: table_name = _table_name eggroll.init(job_id=args.job_id, mode=work_mode) input_data = read_data(input_file_path, table_name, namespace, head) data_table = storage.save_data(input_data, name=table_name, namespace=namespace, partition=partition) print("------------load data finish!-----------------") print("file: {}".format(input_file_path)) print("total data_count: {}".format(data_table.count())) print("table name: {}, table namespace: {}".format( table_name, namespace)) except ValueError: print('json parse error') exit(-102) except IOError: print('read file error') exit(-103) except: traceback.print_exc()
def save_data(input_data, table_name, namespace): storage.save_data(input_data, table_name, namespace)
def load_file(load_file_path): try: # args.config = os.path.abspath(args.config) input_file_path = None head = True table_name = None namespace = None with open(load_file_path, 'r') as f: data = json.load(f) try: input_file_path = data['file'] except: traceback.print_exc() try: read_head = data['head'] if read_head == 0: head = False elif read_head == 1: head = True except: print("'head' in .json should be 0 or 1, set head to 1") try: partition = data['partition'] if partition <= 0 or partition > MAX_PARTITION_NUM: print( "Error number of partition, it should between %d and %d" % (0, MAX_PARTITION_NUM)) sys.exit() except: print("set partition to 1") partition = 1 try: table_name = data['table_name'] except: print( "not setting table_name or setting error, set table_name according to current time" ) try: namespace = data['namespace'] except: print( "not setting namespace or setting error, set namespace according to input file name" ) work_mode = data.get('work_mode') if work_mode is None: work_mode = 0 else: work_mode = int(work_mode) if not os.path.exists(input_file_path): print("%s is not exist, please check the configure" % (input_file_path)) sys.exit() input_data = read_data(input_file_path, head) if True: eggroll.init(mode=work_mode) _namespace, _table_name = generate_table_name(input_file_path) if namespace is None: namespace = _namespace if table_name is None: table_name = _table_name save_data(input_data, table_name, namespace, partition, work_mode) except ValueError: print('json parse error') exit(-102) except IOError: print('read file error') exit(-103)