Beispiel #1
0
    def receiving_loop(self):
        while True:
            if self._count == 0:
                time_start = time.time()
                self._matrix = server(self._ipAddr)
                print("Updating based on received information...")
                print("item: \n", self._matrix)
                print("Shape of received item:", np.shape(self._matrix))
                self._vect = np.ones((int(np.shape(self._matrix)[1] / 2), 1))
                self._matrixReady = True

                # Time Stamp
                time_end = time.time()
                self._time_receive.append(time_end - time_start)
                print("Receiving time: ", time_end - time_start)
                time.sleep(3)
            else:
                time_start = time.time()
                self._vect = server(self._ipAddr)
                print("Updating based on received information...")
                print("item: \n", self._vect)
                print("Shape of received item:", np.shape(self._vect))
                if self._vect == "All done":
                    print("All done")
                    exit()
                self._matrixReady = True

                # Time Stamp
                time_end = time.time()
                self._time_receive.append(time_end - time_start)
                print("Receiving time: ", time_end - time_start)
                time.sleep(3)
    def receivingLoop(self):
        while True:
            item = server(self._ipAddr)
            print("Updating based on received information...")
            print("item: \n", item)
            print("Shape of received item:")
            print(np.array(item).shape)

            # assume that item contains multiple partitions of same size
            # First column will contain general information about item received (e.g. how many partitions)
            self._no_partitions = int(item[0, 0])
            self._x = item[:, 1]
            self._matrix = item[:, 2:]
            self._matrixReady = True
            time.sleep(3)
Beispiel #3
0
def run():
    while True:

        # node detection: determine IP address
        #nodeIP = socket.gethostbyname_ex("raspberrypi.local")#[-1]
        '''Currently unable to detect own ip do to identical names. Must be changed for each node'''
        nodeIP = '192.168.0.106'
        print(nodeIP)
        # listen for info from aggregator.
        # determine node number (n) and total number of nodes (Num) from agg Xmission
        # N is list of node
        # win,tau,k,d,N,aggIP = node_server.server(node_ip)
        info = node_server.server(nodeIP)

        n = info.node_dict[nodeIP]  #node number
        Num = len(info.node_dict)  # total number of nodes
        d = info.d  # number data points/node
        tau = info.tau  # number of local iterations
        k = info.k  # global iteration number
        host = info.host  # aggregator IP
        shuff = info.shuff  # Shuffling type: 0=none,1=random,2=roundrobin,3=segShift

        # these can be pulled from the agg if desired
        weight = 1
        eta = .01
        lam = 1

        # pull data_pts for global update
        # 	Currently pulling the entire dataset for each node but using a part.
        w = info.w
        D = d * Num  # Total data points
        filepath = r"train.csv"
        data_pts = pd.read_csv(filepath, skiprows=(k * D), nrows=(D)).values

        # SVM
        w, fn = SVM.svm(w,
                        data_pts,
                        eta,
                        lam,
                        tau,
                        d,
                        weight,
                        shuff=shuff,
                        N=Num,
                        n=n)

        # send w to agg
        node_client.client(w, fn, host)
Beispiel #4
0
 def __init__(self):
     self.server = server.server(args)
     self.server.create()
     self.server.serve()
Beispiel #5
0
def run():
    os.system('hostname -I > output.txt'
              )  # these 6 commands reads ip address from linux OS
    ip_file = open('output.txt', 'r')
    n_ip = ip_file.readline()
    nodeIP = n_ip.rstrip(' \n')
    os.remove('output.txt')
    pad_value = 1
    mcast_recv1.kill = False
    n = e.cycle5(int(nodeIP[-1]) + 1)
    e.node = n
    recv_nodes = e.node_tracker()
    recv_stages = (1, 1, 2)
    threads = {}
    cache_q = queue.Queue(
    )  # use of the fifo queue structure ensures safe exchange of data between threads
    k = 1  # global counter
    for i in range(3):
        # create and start separate threads to receive from different nodes (node, stage, q, priority):
        threads[i] = threading.Thread(target=mcast_recv1.m_recv,
                                      args=(recv_nodes[i], recv_stages[i],
                                            cache_q, i + 1),
                                      daemon=False)
        threads[i].start()
        print('starting thread ', i + 1)

    while True:  # main running loop

        print('I am ', nodeIP)
        print('Waiting for Agg')
        # listen for info from aggregator.
        # determine node number (n) and total number of nodes (Num) from agg Xmission
        # N is list of node
        # win,tau,k,d,N,aggIP = node_server.server(node_ip)
        while True:
            info = node_server.server(nodeIP)
            if info == 'resend':
                print('## RESENDING PROTOCOL ##')
                s1_data = pad.pad(e.stage1_send(), pad_value)
                s2_data = pad.pad(e.stage2_send(), pad_value)
                mcast_send.send(n, 1, s1_data)  # send stage1 partition
                mcast_send.send(n, 2, s2_data)  # send stage2 partition
            else:
                break
        if info == 'restart':
            print('***Restarting Program***')
            break
        # n = info.node_dict[nodeIP] + 1  # node number (1,2,3,4,5)
        print('My node number is ', n)
        Num = len(info.node_dict)  # total number of nodes (should be 5)
        d = info.d  # number data points/node
        tau = info.tau  # number of local iterations
        k = info.k  # global iteration number
        K = info.K  # total global iterations
        pad_value = info.pad_value  # padding value of data for testing

        if k == 0:
            print('beginning session')
            e.set_flag = False
            mcast_recv1.prev = 0

        # we are starting over, we need to empty out the queue
        if not cache_q.empty():  #
            for i in range(cache_q.qsize()):
                cache_q.get()
        host = info.host  # aggregator IP
        shuff = info.shuff  # number of shuffles per global iteration

        # these can be pulled from the agg if desired
        weight = 1
        eta = .01
        lam = 1

        # pull data_pts for global update
        w = info.w
        D = d * Num  # Total data points
        filepath = r"train.csv"

        # if we didn't set the encoder, then set it here
        if not e.set_flag:
            e.create_workspace(n, filepath,
                               D)  # initialization of the workspace file
        shuffcount = 1  # compare to iterations
        # this variable keeps track of which data part is needed next for receive, there are 3 needed for each recv
        cpart = 1
        time_init = time.time()
        ans_data = [(0, 'no data'), (0, 'no data'), (0, 'no data')]
        # wait for other nodes to start receiving
        # time.sleep(0.1*(5-n))

        # this is the main running loop
        while shuffcount <= shuff:  # main coded shuffling running loop (for shuff iterations)
            s1_data = pad.pad(e.stage1_send(), pad_value)
            s2_data = pad.pad(e.stage2_send(), pad_value)
            mcast_send.send(n, 1, s1_data)  # send stage1 partition
            mcast_send.send(n, 2, s2_data)  # send stage2 partition
            data_pts = e.get_work_file(
            )  # data points for current iteration to use for training
            print('training')
            data_pts = data_pts.astype('float64')

            # Do the training time. If it gets hung up for too long, restart.
            w, fn = SVM.svm(w,
                            data_pts,
                            eta,
                            lam,
                            tau,
                            d,
                            weight,
                            shuff=0,
                            N=Num,
                            n=n - 1)  #train on tau iterations data_pts d
            try_time = time.time()

            # you need to receive all 3 parts from your queue. Loop until you have everything you need.
            while cpart < 4:
                cp = cpart  # need this to ensure fifo queue is completely cycled before searching for the next part
                for i in range(cache_q.qsize()
                               ):  # cycle through the queue for receive data
                    a = cache_q.get()  # tuple in form (part integer, DATA)
                    if a[0] == cp:  # if it is the part needed adds part to ans_data and increments to next part
                        cp = 'disabled'
                        ans_data[cpart - 1] = a
                        print('got part ', cpart)
                        cpart = cpart + 1
                    else:
                        cache_q.put(a)

                # Evaluate a timeout. If we timeout, break the loop.
                #if time.time() >= try_time + 5:  # resend if it doesnt find data after 5 seconds
                #  print("Resending, couldn't find part ", cpart)
                # break
            if time.time() >= (
                    time_init + 5 * 60
            ):  # times out 5*60s of not receiving all 3 pieces of data
                print("Error: threads timed out")
                exit(-1)

            # Nothing bad happened. We can use the data.
            if cpart >= 4:  # receive condition triggers when all data is ready
                print('data received')
                e.recv(pad.unpad(ans_data[0][1], int(d / 4)),
                       pad.unpad(ans_data[1][1], int(d / 4)),
                       pad.unpad(ans_data[2][1], int(d / 4)))
                cpart = 1
                time_init = time.time()
                shuffcount = shuffcount + 1
        # send w to agg
        print('sending cleanup data')
        s1_data = pad.pad(e.stage1_send(), pad_value)
        s2_data = pad.pad(e.stage2_send(), pad_value)
        mcast_send.send(n, 1, s1_data)  # send stage1 partition
        mcast_send.send(n, 2, s2_data)  # send stage2 partition
        node_client.client(w, fn, host)
        k = k + 1

    if not mcast_recv1.kill:  # kills threads by ending underlying function(s)
        mcast_recv1.kill = True
        print('Threads killed')