예제 #1
0
def do_export_file(job_id, _data):
    try:
        work_mode = _data.get("work_mode")
        name = _data.get("table_name")
        namespace = _data.get("namespace")
        delimitor = _data.get("delimitor", ",")
        output_path = _data.get("output_path")

        eggroll.init(job_id, work_mode)

        with open(os.path.abspath(output_path), "w") as fout:
            data_table = storage.get_data_table(name=name, namespace=namespace)
               
            print('===== begin to export data =====')
            lines = 0

            for key, value in data_table.collect():
                if not value:
                    fout.write(key + "\n")
                else:
                    fout.write(key + delimitor + value + "\n")
                
                lines += 1
                if lines % 2000 == 0:
                    print("===== export {} lines =====".format(lines))

            print("===== export {} lines totally =====".format(lines))
            print('===== export data finish =====')
    except:
        raise ValueError("cannot export data, please check json file")
예제 #2
0
파일: upload.py 프로젝트: zzzcq/eggroll
                work_mode = job_config.get('work_mode')
                if work_mode is None:
                    work_mode = 0

            if not os.path.exists(input_file_path):
                print("%s is not exist, please check the configure" % (input_file_path))
                sys.exit()

            input_data = read_data(input_file_path, head)
            _namespace, _table_name = generate_table_name(input_file_path)
            if namespace is None:
                namespace = _namespace
            if table_name is None:
                table_name = _table_name
            eggroll.init(mode=work_mode)
            data_table = save_data(input_data, name=table_name, namespace=namespace, partition=partition)
            print("------------load data finish!-----------------")
            print("file: {}".format(input_file_path))
            print("total data_count: {}".format(data_table.count()))
            print("table name: {}, table namespace: {}".format(table_name, namespace))

        except ValueError:
            print('json parse error')
            exit(-102)
        except IOError:
            print('read file error')
            exit(-103)
    except:
        traceback.print_exc()
예제 #3
0
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

from eggroll.api import eggroll
from eggroll.api import clustercomm

if __name__ == '__main__':
    eggroll.init("atest")
    clustercomm.init(
        "atest", {
            "local": {
                "role": "guest",
                "party_id": 10001
            },
            "role": {
                "host": [10001, 10002],
                "arbiter": [99999],
                "guest": [10001]
            }
        })

    for _tag in range(0, 1000, 2):
        b = clustercomm.get("RsaIntersectTransferVariable.rsa_pubkey",
예제 #4
0
from eggroll.api.eggroll import init, parallelize
# from eggroll.api.cluster.mock_roll import init, parallelize

import numpy as np


def f(iterator):
    sum = 0
    for k, v in iterator:
        sum += v
    return sum


if __name__ == "__main__":
    init()

    _matrix = np.ones([400, 50])

    _table = parallelize(_matrix, partition=40)

    c = _table.mapValues(lambda _x: _x)
    dict(c.collect())
    print(list(c.collect()))

    _table = parallelize(["b", "a", "c"], partition=5)

    a = _table.mapValues(lambda _x: _x + "1")
    print(list(a.collect()))
    print(dict(a.collect()))
    print(list(_table.collect()))
예제 #5
0
def mul(x, y):
    return x * y


class TestMethod(object):
    def test(self, value):
        return self.mul(len(value))

    def mul(self, x):
        return x * x


if __name__ == '__main__':
    # 修改flow_id 否则内存表可能被覆盖
    eggroll.init(mode=0)
    ns = str(uuid.uuid1())

    X = eggroll.table('testX7', ns, partition=2)
    Y = eggroll.table('testY7', ns, partition=2)

    # X.destroy()
    # Y.destroy()

    b = np.array([0])
    eta = 1.2
    max_iter = 100

    total_num = 500

    _x, _y = make_moons(total_num, noise=0.25)
    if data_meta_table:
        value_bytes = data_meta_table.get(key, use_serialize=False)
        if value_bytes:
            return json_loads(value_bytes)
        else:
            return None
    else:
        return None


if __name__ == '__main__':
    from eggroll.api import eggroll
    import uuid
    import random
    job_id = str(uuid.uuid1().hex)
    eggroll.init(job_id=job_id, mode=0)

    table_name = "test_example"
    table_namespace = "storage_test_example"

    def gen_test_data(row_count, column_count):
        for r in range(row_count):
            k = uuid.uuid1().hex
            v = ','.join(
                [str(random.randint(1, 100)) for i in range(column_count - 1)])
            yield k, v

    data_table = save_data(gen_test_data(5, 10),
                           name=table_name,
                           namespace=table_namespace)