def do_export_file(job_id, _data): try: work_mode = _data.get("work_mode") name = _data.get("table_name") namespace = _data.get("namespace") delimitor = _data.get("delimitor", ",") output_path = _data.get("output_path") eggroll.init(job_id, work_mode) with open(os.path.abspath(output_path), "w") as fout: data_table = storage.get_data_table(name=name, namespace=namespace) print('===== begin to export data =====') lines = 0 for key, value in data_table.collect(): if not value: fout.write(key + "\n") else: fout.write(key + delimitor + value + "\n") lines += 1 if lines % 2000 == 0: print("===== export {} lines =====".format(lines)) print("===== export {} lines totally =====".format(lines)) print('===== export data finish =====') except: raise ValueError("cannot export data, please check json file")
work_mode = job_config.get('work_mode') if work_mode is None: work_mode = 0 if not os.path.exists(input_file_path): print("%s is not exist, please check the configure" % (input_file_path)) sys.exit() input_data = read_data(input_file_path, head) _namespace, _table_name = generate_table_name(input_file_path) if namespace is None: namespace = _namespace if table_name is None: table_name = _table_name eggroll.init(mode=work_mode) data_table = save_data(input_data, name=table_name, namespace=namespace, partition=partition) print("------------load data finish!-----------------") print("file: {}".format(input_file_path)) print("total data_count: {}".format(data_table.count())) print("table name: {}, table namespace: {}".format(table_name, namespace)) except ValueError: print('json parse error') exit(-102) except IOError: print('read file error') exit(-103) except: traceback.print_exc()
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from eggroll.api import eggroll from eggroll.api import clustercomm if __name__ == '__main__': eggroll.init("atest") clustercomm.init( "atest", { "local": { "role": "guest", "party_id": 10001 }, "role": { "host": [10001, 10002], "arbiter": [99999], "guest": [10001] } }) for _tag in range(0, 1000, 2): b = clustercomm.get("RsaIntersectTransferVariable.rsa_pubkey",
from eggroll.api.eggroll import init, parallelize # from eggroll.api.cluster.mock_roll import init, parallelize import numpy as np def f(iterator): sum = 0 for k, v in iterator: sum += v return sum if __name__ == "__main__": init() _matrix = np.ones([400, 50]) _table = parallelize(_matrix, partition=40) c = _table.mapValues(lambda _x: _x) dict(c.collect()) print(list(c.collect())) _table = parallelize(["b", "a", "c"], partition=5) a = _table.mapValues(lambda _x: _x + "1") print(list(a.collect())) print(dict(a.collect())) print(list(_table.collect()))
def mul(x, y): return x * y class TestMethod(object): def test(self, value): return self.mul(len(value)) def mul(self, x): return x * x if __name__ == '__main__': # 修改flow_id 否则内存表可能被覆盖 eggroll.init(mode=0) ns = str(uuid.uuid1()) X = eggroll.table('testX7', ns, partition=2) Y = eggroll.table('testY7', ns, partition=2) # X.destroy() # Y.destroy() b = np.array([0]) eta = 1.2 max_iter = 100 total_num = 500 _x, _y = make_moons(total_num, noise=0.25)
if data_meta_table: value_bytes = data_meta_table.get(key, use_serialize=False) if value_bytes: return json_loads(value_bytes) else: return None else: return None if __name__ == '__main__': from eggroll.api import eggroll import uuid import random job_id = str(uuid.uuid1().hex) eggroll.init(job_id=job_id, mode=0) table_name = "test_example" table_namespace = "storage_test_example" def gen_test_data(row_count, column_count): for r in range(row_count): k = uuid.uuid1().hex v = ','.join( [str(random.randint(1, 100)) for i in range(column_count - 1)]) yield k, v data_table = save_data(gen_test_data(5, 10), name=table_name, namespace=table_namespace)