Example #1
0
def parallel_computation(values,
                         ranks,
                         to_compute,
                         output_file,
                         threads=1,
                         mode="all2all"):
    """
    Create shared variables and pool for multiprocessing.
    Params: logcounts and sigma numpy array, to_compute list with the index and
    the number of processeurs to use.
    Return: all the correlations and index
    """
    # Define the corresponding shared ctype arrays
    # Create ctypes object from numpy array
    tmp_values = np.ctypeslib.as_ctypes(values)
    shared_values = sharedctypes.Array(tmp_values._type_,
                                       tmp_values,
                                       lock=False)
    #     Returns a ctypes array allocated from shared memory.
    #     tmp_dhs._type_: type of the returned array elements
    #     tmp_dhs: initialize the array
    #     lock=True (default): synchronize access to the value
    #     lock=False: access to returned object not protected

    tmp_ranks = np.ctypeslib.as_ctypes(ranks)
    shared_ranks = sharedctypes.Array(tmp_ranks._type_, tmp_ranks, lock=False)
    # we need to define a partial function here

    temp_file = tempfile.NamedTemporaryFile(delete=False)
    with open(temp_file.name, "w") as temp_fh:
        with Pool(processes=threads,
                  initializer=_init_parallel,
                  initargs=(
                      shared_values,
                      shared_ranks,
                  )) as pool:
            #     controls a pool of worker processes
            #     processes: number of worker processes to use
            #     initializer is not None: each worker process will
            #     call initializer(*initargs)

            #     we split the elements to compute in batchs of size 50000
            launch_para_correlations = partial(parallel_correlations,
                                               mode=mode)
            num_pass = 0
            for group_to_compute in grouping(50000, to_compute):
                num_pass += 1
                eprint("Pass %d" % num_pass)
                nested_result = pool.map(launch_para_correlations,
                                         group_to_compute, 100)
                #     supports only one iterable argument,
                #                       blocks until the result is ready
                result = [
                    item for sublist in nested_result for item in sublist
                ]
                dump_correlations(result, temp_fh, mode)

    sort_and_write(temp_file.name, output_file, mode)

    return 0
Example #2
0
    def __init__(self,
                 update_data_func=None,
                 state_transition_func=None,
                 thermostat=False):
        """Create variables used to send in packets to the roaster. The update
        data function is called when a packet is opened. The state transistion
        function is used by the timer thread to know what to do next. See wiki
        for more information on packet structure and fields."""
        self.update_data_func = update_data_func
        self.state_transition_func = state_transition_func

        self._header = sharedctypes.Array('c', b'\xAA\xAA')
        self._temp_unit = sharedctypes.Array('c', b'\x61\x74')
        self._flags = sharedctypes.Array('c', b'\x63')
        self._current_state = sharedctypes.Array('c', b'\x02\x01')
        self._footer = b'\xAA\xFA'

        self._fan_speed = sharedctypes.Value('i', 1)
        self._heat_setting = sharedctypes.Value('i', 0)
        self._target_temp = sharedctypes.Value('i', 150)
        self._current_temp = sharedctypes.Value('i', 150)
        self._time_remaining = sharedctypes.Value('i', 0)
        self._total_time = sharedctypes.Value('i', 0)

        self._cont = sharedctypes.Value('i', 1)

        if (thermostat is True):
            self.thermostat_process = mp.Process(target=self.thermostat)
            self.thermostat_process.start()
def main():

    workers = []
    task_queue = Queue()
    result_queue = Queue()

    # input data
    n = 10000
    m = 100
    X_in = np.random.rand(n, m)
     
    size_in = X_in.size
    shape_in = X_in.shape
    X_in.shape = size_in

    X_ctypes_in = sharedctypes.RawArray(ctypes.c_double, X_in)
    X_in = np.frombuffer(X_ctypes_in, dtype=np.float64, count=size_in)
    X_in.shape = shape_in

    # output data
    X_out = np.random.rand(n, n)
     
    size_out = X_out.size
    shape_out = X_out.shape
    X_out.shape = size_out

    X_ctypes_out = sharedctypes.Array(ctypes.c_double, X_out)
    #X_out = np.frombuffer(X_ctypes_out, dtype=np.float64, count=size_out)
    #X_out.shape = shape_out

    # create worker and start
    concurrency = 4
    unit = n / 4
    for i in xrange(concurrency):
        worker = Worker(task_queue, result_queue,
                        X_ctypes_in, shape_in,
                        X_ctypes_out, shape_out)
        worker.start()
        workers.append(worker)
        
    # put task
    for i in xrange(concurrency):
        task_queue.put((i, unit*i, unit*i+unit))
        pass
        
    # get result of task
    elapsed_times = []
    while True:
        elapsed_time = result_queue.get()
        elapsed_times.append(elapsed_time)

        if len(elapsed_times) == concurrency:
            break

    # stop worker
    for i in xrange(concurrency):
        workers[i].terminate()

    # do math for elapsed time
    print "Elapsed time {} [s]".format(np.max(elapsed_times))
Example #4
0
def test_with_shared(N, M, complexity):
    x = ones(N)
    x_sct = sharedctypes.Array(ctypes.c_double, x, lock=False)
    A = 1 + arange(M)

    #pool = multiprocessing.Pool()
    start = time.time()
    processes = [
        multiprocessing.Process(target=test_with_shared_func,
                                args=(x_sct, a, complexity)) for a in A
    ]
    for p in processes:
        p.start()
    for p in processes:
        p.join()
    end = time.time()
    t_multiprocessing = end - start

    results = []
    args = [(x, a, complexity) for a in A]
    start = time.time()
    for x, a, complexity in args:
        results.append(test_with_shared_func(x, a, complexity))
    end = time.time()
    t_single = end - start

    print 'Test with shared'
    print '----------------'
    print 'Single:', t_single
    print 'Multiprocessing:', t_multiprocessing
    print 'S/M:', t_single / t_multiprocessing
Example #5
0
def parallel_implementation_corr(logcounts,
                                 sigma,
                                 ranks,
                                 to_compute,
                                 threads=1):
    """
    Create shared variables and pool for multiprocessing.
    Params: logcounts and sigma numpy array, to_compute list with the index and
    the number of processeurs to use.
    Return: all the correlations and index
    """
    # Define the corresponding shared ctype arrays
    # Create ctypes object from numpy array
    tmp_logcounts = np.ctypeslib.as_ctypes(logcounts)
    shared_logcounts = sharedctypes.Array(tmp_logcounts._type_,
                                          tmp_logcounts,
                                          lock=False)
    #     Returns a ctypes array allocated from shared memory.
    #     tmp_dhs._type_: type of the returned array elements
    #     tmp_dhs: initialize the array
    #     lock=True (default): synchronize access to the value
    #     lock=False: access to returned object not protected

    tmp_sigma = np.ctypeslib.as_ctypes(sigma)
    shared_sigma = sharedctypes.Array(tmp_sigma._type_, tmp_sigma, lock=False)

    tmp_ranks = np.ctypeslib.as_ctypes(ranks)
    shared_ranks = sharedctypes.Array(tmp_ranks._type_, tmp_ranks, lock=False)

    pool = Pool(processes=threads,
                initializer=_init_parallel,
                initargs=(
                    shared_logcounts,
                    shared_sigma,
                    shared_ranks,
                ))
    #                       controls a pool of worker processes
    #                       processes: number of worker processes to use
    #                       initializer is not None: each worker process will
    #                       call initializer(*initargs)
    result = pool.map(parallel_correlations, to_compute)
    #                       supports only one iterable argument,
    #                       blocks until the result is ready

    results = [item for sublist in result for item in sublist]
    return results
Example #6
0
def np_to_c(np_array):
    """
    Converts numpy array to c-type to allow for sharing across different threads
    :param np_array:
    :return: ctype array
    """
    tmp = np.ctypeslib.as_ctypes(np_array)
    c_array = sharedctypes.Array(tmp._type_, tmp, lock=False)
    return c_array
Example #7
0
def main():
    """run radio display"""

    parser = argparse.ArgumentParser(description=main.__doc__)
    parser.add_argument('--color', default='black')
    subparsers = parser.add_subparsers(dest='subparser')

    serial_parser = subparsers.add_parser('serial')
    serial_parser.add_argument('--device', default='/dev/ttyS0')
    serial_parser.add_argument('--timeout', default=10, type=float)
    serial_parser.add_argument('--baud', default=115200, type=int)

    subparsers.add_parser('keyboard')
    args = parser.parse_args()

    channel_models = sharedctypes.Array(models.Channel, [
        ('', 0, '---.-', '!----------') + 6 * (False, ),
        ('', 0, '---.-', '!----------') + 6 * (False, ),
    ],
                                        lock=True)

    width, height = 480, 320
    window = pyglet.window.Window(width=width, height=height)
    channel_views, bg, batch = views.build(color=args.color)

    @window.event
    def on_draw():
        window.clear()
        zipped = zip(channel_views, channel_models)
        for view, model in zipped:
            view(model)
        batch.draw()

    def update(dt):
        for view in channel_views:
            view.scroll()

    pyglet.clock.schedule_interval(update, 1.0 / 25)
    stop = threading.Event()

    if args.subparser == 'keyboard':
        controller = controllers.KeyboardInput(channel_models, stop)
    else:
        controller = controllers.SerialInput(channel_models,
                                             stop,
                                             port=args.device,
                                             baudrate=args.baud,
                                             timeout=args.timeout)

    t = threading.Thread(target=controller)
    t.start()

    pyglet.app.run()

    stop.set()
    t.join()
Example #8
0
    def __init__(self, calculator, nombreThreads):
        self.calculator = calculator
        self.nombreThreads = nombreThreads
        self.nombrePoints = self.calculator.nbPoints
        self.nombreCentroides = len(self.calculator.centroides)
        #définition des objets partagés entre les threads
        self.sharedArrayPoints = sharedctypes.Array(
            'd', self.nombrePoints *
            self.nombrePoints)  #la matrice, convertie en une ligne
        #création du dictionnaire contenant les clusters
        self.manager = Manager()
        self.dictClusters = self.manager.dict()
        #initialisation des clusters
        for i in range(self.nombreCentroides):
            self.dictClusters[i] = []

        self.sharedArrayCentroides = sharedctypes.Array(
            'd', self.nombreCentroides * self.nombrePoints
        )  #la matrice de centroides, ceonvertie en une ligne

        #définition de la matrice de centroides
        self.matrCentroides = np.frombuffer(
            self.sharedArrayCentroides.get_obj()).reshape(
                (self.nombreCentroides, self.nombrePoints))
        for i in range(self.nombreCentroides):
            self.matrCentroides[i] = self.calculator.centroides[i]

        #définition de la matrice des Points
        self.matrPoints = np.frombuffer(
            self.sharedArrayPoints.get_obj()).reshape(
                (self.nombrePoints, self.nombrePoints))
        for i in range(self.nombrePoints):
            self.matrPoints[i] = self.calculator.matrice[i]
        self.nombreEntreesParThread = self.nombrePoints // self.nombreThreads
        self.indexes = []
        #définition des indexes pour les threads
        for i in range(self.nombreThreads):
            idxD = i * self.nombreEntreesParThread
            if i == self.nombreThreads - 1:
                idxF = self.nombrePoints - 1
            else:
                idxF = ((i + 1) * self.nombreEntreesParThread) - 1
            self.indexes.append((idxD, idxF))
Example #9
0
def make_common_item(v):
    shape = v.shape
    mapping = {
        numpy.dtype(numpy.float64): ctypes.c_double,
        numpy.dtype(numpy.int32): ctypes.c_int,
    }
    ctype = mapping.get(v.dtype, None)
    if ctype is not None:
        v = v.flatten()
        v = sharedctypes.Array(ctype, v, lock=False)
    return v, shape
Example #10
0
    def processes_start(self):
        """."""
        # Create shared memory objects to be shared with worker processes.
        arr = self._sofb_current_readback_ref

        rbref = _shm.Array(_shm.ctypes.c_double, arr.size, lock=False)
        self._sofb_current_readback_ref = _np.ndarray(
            arr.shape, dtype=arr.dtype, buffer=memoryview(rbref))

        ref = _shm.Array(_shm.ctypes.c_double, arr.size, lock=False)
        self._sofb_current_refmon = _np.ndarray(
            arr.shape, dtype=arr.dtype, buffer=memoryview(ref))

        fret = _shm.Array(_shm.ctypes.c_int, arr.size, lock=False)
        self._sofb_func_return = _np.ndarray(
            arr.shape, dtype=_np.int32, buffer=memoryview(fret))

        # Unit converter.
        self.converter = UnitConverter(self._sofb_psnames)

        # subdivide the pv list for the processes
        nr_bbbs = len(PSSOFB.BBBNAMES)
        div = nr_bbbs // self._nr_procs
        rem = nr_bbbs % self._nr_procs
        sub = [div*i + min(i, rem) for i in range(self._nr_procs+1)]
        for i in range(self._nr_procs):
            bbbnames = PSSOFB.BBBNAMES[sub[i]:sub[i+1]]
            evt = _Event()
            evt.set()
            theirs, mine = _Pipe(duplex=False)
            proc = _Process(
                target=PSSOFB._run_process,
                args=(self._ethbridge_cls, bbbnames, theirs, evt,
                      arr.shape, rbref, ref, fret),
                daemon=True)
            proc.start()
            self._procs.append(proc)
            self._doneevts.append(evt)
            self._pipes.append(mine)
Example #11
0
    def create_diamond_square_map(self, low_val=0, high_val=100):
        """
        Runs DiamondSquare algorithm to get a 2d grid of values.
        Because of the algorithm's inherent bias towards the initial values,
        expect to see each grid biasing towards the values in the corners.

        :param low_val: minimum value that should appear in the resulting grid
        :param high_val: maximum value that should appear in the resulting grid
        :return: 2d grid of procedurally generated values
        """
        mid_value = math.floor((low_val + high_val) / 2)
        quarter_value = mid_value / 2
        three_quart_value = mid_value + mid_value / 2
        seed_options = [
            mid_value, mid_value, quarter_value, three_quart_value,
            three_quart_value
        ]

        # initialize grid with corners
        left = top = 0
        right = bottom = self.size - 1
        x_center = math.floor((left + right) / 2)
        y_center = math.floor((top + bottom) / 2)

        init_grid = np.zeros((self.size, self.size))
        init_grid[top, left] = random.choice(seed_options)
        init_grid[top, right] = random.choice(seed_options)
        init_grid[bottom, left] = random.choice(seed_options)
        init_grid[bottom, right] = random.choice(seed_options)

        tmp = np.ctypeslib.as_ctypes(init_grid.ravel())
        shared_array = sharedctypes.Array(tmp._type_, tmp, lock=False)

        # I found that there's a nice speed boost if you split the first recursion between four processes
        # but anything more than that gave diminishing returns.
        with Pool(processes=4, initializer=mp_init,
                  initargs=(shared_array, )) as pool:
            # do first step
            self._diamond_square(left, top, right, bottom, mid_value,
                                 shared_array)

            pool.map(self._ds_recurse,
                     [(left, top, x_center, y_center, mid_value),
                      (left, y_center, x_center, bottom, mid_value),
                      (x_center, top, right, y_center, mid_value),
                      (x_center, y_center, right, bottom, mid_value)])

        grid = pu.convert_to_np(shared_array, self.size)
        return grid
Example #12
0
def main():
    size = int(10 * 1e6)
    array = sharedctypes.Array("f", size)
    max_epoch = 150
    batch_size = 128
    n_samples = int(10 * 10000)
    iter_per_epoch = n_samples / batch_size
    n_iters = max_epoch * iter_per_epoch

    st = time.time()
    for i in range(n_iters):
        with array.get_lock():
            pass
    et = time.time() - st
    print("ElapsedTime:{} [s]".format(et))
Example #13
0
def make_common_item(v):
    if isinstance(v, numpy.ndarray):
        shape = v.shape
        mapping = {
            numpy.dtype(numpy.float64): ctypes.c_double,
            numpy.dtype(numpy.int32): ctypes.c_int,
            }
        ctype = mapping.get(v.dtype, None)
        if ctype is not None:
            log_debug('converting numpy array to common array')
            v = v.flatten()
            v = sharedctypes.Array(ctype, v, lock=False)
    else:
        # shape = None means that v is not an array and should not be converted
        # back as numpy item
        shape = None
    return v, shape
Example #14
0
    def __init__(self, sync_timer, event_trigger = (), event_ignore_tag = None):
        """Initialize UDPConnectionProcess

        Parameters
        ----------
        receive_queue: multiprocessing.Queue
            the queue to which the received data should be put

        peer_ip : string
            the IP of the peer to which the connection should be established

        sync_clock : Clock
            the internal clock for timestamps will synchronized with this clock

        event_trigger: multiprocessing.Event() (or list of..)
            event trigger(s) to be set. If Udp event is received and it is not a
            command to set this event (typical ofsensor recording processes).

        event_ignore_tag:
            udp data that start with this tag will be ignored for event triggering

        """ # todo docu

        super(UDPConnectionProcess, self).__init__()
        self._sync_timer = sync_timer
        self.receive_queue = Queue()
        self.send_queue = Queue()
        self.event_is_connected = Event()
        self._event_stop_request = Event()
        self._event_is_polling = Event()
        self._ip_address = sharedctypes.Array('c', 'xxx.xxx.xxx.xxx')
        self._event_ignore_tag = event_ignore_tag

        if isinstance(event_trigger, type(Event)  ):
            event_trigger = (event_trigger)
        try:
            self._event_trigger = tuple(event_trigger)
        except:
            self._event_trigger = ()


        atexit.register(self.stop)
Example #15
0
    def __init__(self, image_dir, short_edge, crop_edge, filenames, randoms,
                 train):
        if short_edge < crop_edge:
            raise ValueError
        self._image_dir = image_dir
        self._short_edge = short_edge
        self._crop_edge = crop_edge
        self._filenames = filenames
        self._train = train
        if train:
            randoms = np.asarray(randoms, dtype=np.uint32).ravel()
            self._randoms = sharedctypes.Array(ctypes.c_uint32, randoms)

        self._class_seps = []
        t = 0
        for i, filename in enumerate(filenames):
            if not filename.startswith(self.classes[t]):
                t += 1
                if i == 0 or not filename.startswith(self.classes[t]):
                    raise ValueError
                self._class_seps.append(i)
        if t != len(self.classes) - 1:
            raise ValueError
Example #16
0
def create_share_type(np_array):
    np_carr = np.ctypeslib.as_ctypes(np_array)
    shared_array = sharedctypes.Array(np_carr._type_, np_carr, lock=False)
    return shared_array
Example #17
0
def compute_similarity_wem(n_iter, n_cores, n_locks, data_type, n_samples,
                           data, ground_truth_file, temp_folder, output_folder,
                           annotation_params, classification_params, verbose,
                           debug, with_unique_occurrences, preclustering):
    """
    Computes the simlarity matrix in the basic setting.

    Args:
     * ``n_iter`` (*int*): number of iterations.
     * ``n_cores`` (*int*): number of cores to use.
     * ``n_locks`` (*int*): number of locks to use on the full shared matrix.
     * ``data_type`` (*str*): data set to use.
     * ``n_samples`` (*int*): number of samples.
     * ``data`` (*struct*): initial data.
     * ``ground_truth_file`` (*str*): path to ground-truth.
     * ``temp_folder`` (*str*): path to the temporary folder.
     * ``output_folder`` (*str*): path to the output folder.
     * ``annotation_params`` (*list*): parameters for synthetic annotation.
     * ``classification_params`` (*list*): parameters for classification.
     * ``verbose`` (*int*): sets the verbosity level.
     * ``with_unique_occurrences`` (*bool*): indicates wether samples occur only once in the data set or not.
     * ``with_common_label_wordform`` (*bool*): indicates wether to give the same label to samples with identical wordform or not.

    Returns:
     * ``n_samples_occurrences`` (*list*): number of occurrences of each sample in a test set over all iterations.
     * ``synthetic_labels`` (*list*): synthetic labels repartition for each iteration.
     * ``co_occ`` (*ndarray*): full similarity matrix.
    """

    #Additional variables for estimating the different parameters
    positive_pairs = ground_truth_pairs(data_type, ground_truth_file,
                                        n_samples)
    true_pi0 = 1.0 - float(2 * len(positive_pairs)) / (n_samples *
                                                       (n_samples - 1))
    true_p1 = [0] * n_iter
    true_p0 = [0] * n_iter
    indep_p0 = [0] * n_iter

    #----------------------------------------------------------- SHARED MEMORY
    # Create shared array with ctype structure
    print '> Creating Shared Arrays'
    n_locks = n_samples * 2 if n_locks == -1 else n_locks
    total_length = n_samples * (n_samples - 1) / 2
    cell_length, rst = total_length / n_locks, total_length % n_locks

    if rst > 0:
        n_locks += rst / cell_length
        rst = rst % cell_length
        if rst > 0:
            n_locks += 1

    # An object of type Cell contains the sequence of iteration scores for one pair of samples
    UnitcellType = n_iter * c_int
    shared_coocc = [
        sharedctypes.Array(UnitcellType, [
            UnitcellType(*(2 for _ in xrange(n_iter)))
            for _ in xrange(cell_length)
        ],
                           lock=True) for x in xrange(n_locks)
    ]

    #----------------------------------------------------------- INITIAL WORKER PROCESSES
    threads = {}
    sim_queue = Queue()
    iterations = range(n_iter)
    n_samples_occurrences = np.zeros(n_samples)
    synthetic_labels = []
    n_iterations_done = 0

    #Start initial processes
    for c in xrange(n_cores):
        t = Process(name='Worker %d' % c,
                    target=thread_step,
                    args=(iterations.pop(0), shared_coocc, cell_length,
                          n_samples, sim_queue, data, temp_folder, data_type,
                          annotation_params, classification_params, verbose,
                          debug, with_unique_occurrences, preclustering))
        threads[t.name] = t
        t.start()

    #----------------------------------------------------------- THREADED ADDITIONAL COMPUTATIONS
    ############################################################################################
    def aux_computation_WEM(incoming_queue, n_iter, shared_coocc, total_length,
                            positive_pairs, true_p1, true_p0, indep_p0):
        """
        Additional computations for the EM similarity
        """

        for k in xrange(n_iter):
            (iter, id, n_steps, counts, synth_lab) = incoming_queue.get()

            #Compute Indep p0
            repartition = synth_lab[-1]
            indep_p0[n_steps] = float(sum([x**2 for x in repartition
                                           ])) / sum(repartition)**2

            print 'Computation %d done' % (iter + 1)

    ############################################################################################

    incoming_queue = Queue_thread()
    aux_thread = threading.Thread(target=aux_computation_WEM,
                                  args=(incoming_queue, n_iter, shared_coocc,
                                        total_length, positive_pairs, true_p1,
                                        true_p0, indep_p0))
    aux_thread.start()

    # Retrieve results from queue and restat threads if needed
    for k in xrange(n_iter):
        #---- Retrieve and restart
        (id, n_steps, counts, synth_lab, _, b) = sim_queue.get()
        threads[id].join()

        #Launch Compute thread
        incoming_queue.put((k, id, n_steps, counts, synth_lab))

        #Start next process
        if len(iterations) > 0:
            t = Process(name=id,
                        target=thread_step,
                        args=(iterations.pop(0), shared_coocc, cell_length,
                              n_samples, sim_queue, data, temp_folder,
                              data_type, annotation_params,
                              classification_params, verbose, debug,
                              with_unique_occurrences, preclustering))
            threads[id] = t
            t.start()
        else:
            del threads[id]

        #---- Additional Computations
        n_samples_occurrences += counts
        synthetic_labels.append(synth_lab)

        #----- Verbose outputs
        if verbose >= 1:
            n_iterations_done += 1
            print >> sys.stderr, '\n>>>> %d/%d iterations done\n' % (
                n_iterations_done, n_iter)

    # Join remaining processes (normally, they are processe that crashed)
    for i, t in enumerate(threads.values()):
        t.join()
        if verbose >= 1:
            print 'joined thread %d' % i

    aux_thread.join()
    gc.collect()

    #------------------------------------------------------------------------- POST PROCESS MATRIX
    ####### Reshapping the result into a true numpy matrix
    print '> Reshaping co_occurence matrix'
    #flat_cooc = np.hstack(shared_coocc)[:total_length]
    print 'Long format'
    flat_cooc = np.zeros(total_length, dtype='object')
    i = 0
    for x in shared_coocc:
        for y in x:
            if i >= total_length:
                break
            flat_cooc[i] = long(''.join([str(z) for z in y]))
            i += 1
    del shared_coocc

    # True parameters
    print 'Compute ground-truth paramters'
    for n_steps in xrange(n_iter):
        to_iter = np.vectorize(lambda x: int(str(x).zfill(n_iter)[n_steps]))
        mat = to_iter(flat_cooc)
        true_p1[n_steps] = float(len(
            np.where(mat[positive_pairs] == 1)[0])) / len(positive_pairs)
        true_p0[n_steps] = float(
            len(
                np.where(mat[np.setdiff1d(range(total_length), positive_pairs)]
                         == 1)[0])) / (total_length - len(positive_pairs))
        del mat

    ###### EM Estimation
    print 'Running EM'
    z1, pi0, p0, p1 = estimate_parameters_em(flat_cooc,
                                             n_iter,
                                             p1i=0.9,
                                             p0i=0.1,
                                             pi0i=0.8,
                                             n_iter=25,
                                             cores=n_cores)

    # Estimates
    print 'Estimated parameters (pi0, p0, p1) %s \n %s \n %s' % (pi0, p0, p1)
    print 'True parameters (pi0, p0, p1) %s \n %s \n %s' % (true_pi0, true_p0,
                                                            true_p1)
    print 'Independant parameters p0  \n  %s \n ' % (indep_p0)
    a = [0 if p == 0 else np.sqrt((1 - p) / p) for p in p0]
    b = [0 if p == 0 else -np.sqrt(p / (1 - p)) for p in p0]

    # Similarities
    if verbose >= 2:
        print 'Binary sim'
        to_bin_sim = np.vectorize(lambda x: float(str(long(x)).count(1)) /
                                  float(str(long(x)).count(2)))
        save_coocc(output_folder,
                   to_bin_sim(flat_cooc).astype(np.float),
                   suffix='binary_final_flat')

        print 'WBIN with EM weights'
        to_wbinem_sim = np.vectorize(lambda x: np.sum([
            a[i] if int(y) == 1 else b[i] if int(y) == 2 else 0
            for y in str(long(x))
        ]) / float((str(long(x)).count(2))))
        save_coocc(output_folder,
                   to_wbinem_sim(flat_cooc).astype(np.float),
                   suffix='weighted_final_flat')

    print 'EM similarities'
    to_em_sim = np.vectorize(lambda x: z1[str(long(x)).zfill(n_iter)])
    co_occ = np.zeros((n_samples, n_samples), dtype=float)
    co_occ[np.triu_indices(n_samples, k=1)] = to_em_sim(flat_cooc)

    return n_samples_occurrences, synthetic_labels, co_occ, None
def f(mp_arr, mp_it, mp_it2, mp_val, num, N, rr):
    for (i, j, r) in itt.izip(mp_it[(num * rr):((num + 1) * rr)],
                              mp_it2[(num * rr):((num + 1) * rr)],
                              mp_val[(num * rr):((num + 1) * rr)]):
        mp_arr[i * N + j] = r


if __name__ == '__main__':
    N = 6000
    nbModif = N * 200
    nbThreads = 4
    rr = nbModif / nbThreads
    lock = mp.Lock()
    pool = []
    mp_arr = sct.Array(c.c_double, N * N, lock=False)
    mp_it = sct.Array(c.c_int, nbModif, lock=False)
    mp_it2 = sct.Array(c.c_int, nbModif, lock=False)
    mp_val = sct.Array(c.c_double, nbModif, lock=False)
    listi = np.random.permutation(np.arange(N))
    listj = np.random.permutation(np.arange(N))

    # just random things to assign
    a = []
    for num in np.arange(nbModif):
        n = num % N
        a.append((listi[n], listj[n], n))
        mp_it[num] = listi[n]
        mp_it2[num] = listj[n]
        mp_val[num] = n
Example #19
0
def balance_ratio(G,
                  length,
                  exact=False,
                  n_samples=100000,
                  accuracy=None,
                  sampling_func=nrsampling,
                  parallel=True):
    """
    Returns balance ratio of "G" based on simple cycles of length upto "length". Please note
    that it catches the Keyboard interruption if "exact" is False. It is helpful when one wants
    to interrupt the algorithm because it is running for longer than expected or the accuracy
    has reached an acceptable value.

    Parameters
    ----------
    G : numpy.ndarray
        Adjacency matrix of graph
    length : int
        Maximum length of simple cycles
    exact : bool
        If True, the algorithm counts all subgraphs of G.
        Otherwise, it uses a sampling technique to sample
        enough number of subgraphs so that the estimate of balance
        would converge.
    n_samples : int
        If exact is False, how many samples to take from graph. Use a float number
    accuracy : float
        If provided, it is used as a threshold on standard deviation of estimated ratios. In this
        case, the "n_samples" parameter is ignored and algorithm runs until the desired accuracy
        is reached.
    sampling_func : function
        A sampling function from `cycleindex.sampling` module.
    parallel : bool
        If True, the sampling is done in parallel.
        If "exact" is True, "parallel" is ignored.

    Returns
    -------
    numpy.ndarray
        A numpy array containing balance ratios upto desired length "length".
    """
    if exact:
        counts = cycle_count(G, length)
    else:
        counts = ([], [])
        batch_count = batch_count_
        args = (sampling_func, True, counts)

        if parallel:
            global shared_G
            n_cores = cpu_count()

            if G.__array_interface__['strides']:
                G = np.ascontiguousarray(G)

            tmp = np.ctypeslib.as_ctypes(G)
            shared_G = sharedctypes.Array(tmp._type_, tmp, lock=False)
            G = None
            pool = Pool(n_cores, init_worker_)
            batch_count = batch_count_parallel_
            args = (n_cores, pool, sampling_func, True, counts)

        try:
            if accuracy:
                last_ratios = []
                batch_size = 1000
                deviation = accuracy

                while np.any(deviation >= accuracy):
                    batch_count(G, length, batch_size, *args)
                    ratios = calc_ratio(*counts)
                    last_ratios.append(ratios)
                    if len(last_ratios) > 5:
                        deviation = np.std(last_ratios, axis=0)
            else:
                batch_count(G, length, n_samples, *args)
        except KeyboardInterrupt:
            if pool:
                pool.terminate()
                pool.join()

        if parallel:
            pool.close()
            pool.join()
            del shared_G

    return calc_ratio(*counts)
Example #20
0
def compute_similarity_basic(
        n_iter, n_cores, n_locks, data_type, n_samples, data, data_occurrences,
        temp_folder, output_folder, annotation_params, classification_params,
        verbose, debug, with_unique_occurrences, preclustering, writing_steps,
        convergence_step, convergence_criterion):
    """
    Computes the simlarity matrix in the basic setting.

    Args:
     * ``n_iter`` (*int*): number of iterations.
     * ``n_cores`` (*int*): number of cores to use.
     * ``n_locks`` (*int*): number of locks to use on the full shared matrix.
     * ``data_type`` (*str*): data set to use.
     * ``n_samples`` (*int*): number of samples.
     * ``data`` (*struct*): initial data.
     * ``data_occurrences`` (*struct*): indicates where each of the samples occurs in the data base.
     * ``temp_folder`` (*str*): path to the temporary folder.
     * ``output_folder`` (*str*): path to the output folder.
     * ``annotation_params`` (*list*): parameters for synthetic annotation.
     * ``classification_params`` (*list*): parameters for classification.
     * ``verbose`` (*int*): sets the verbosity level.
     * ``with_unique_occurrences`` (*bool*): indicates wether samples occur only once in the data set or not.
     * ``with_common_label_wordform`` (*bool*): indicates wether to give the same label to samples with identical wordform or not.
     * ``writing_steps`` (*list*): steps at which to save the partial matrix.
     * ``convergence_step`` (*int*): every 'convergence_step', the script checks if convergence is reached.

    Returns:
     * ``n_samples_occurrences`` (*list*): number of occurrences of each sample in a test set over all iterations.
     * ``synthetic_labels`` (*list*): synthetic labels repartition for each iteration.
     * ``co_occ`` (*ndarray*): full similarity matrix.
     """

    #----------------------------------------------------------- SHARED MEMORY
    # Create shared array
    print '> Creating Shared Array'
    n_locks = n_samples * 2 if n_locks == -1 else n_locks
    total_length = n_samples * (n_samples - 1) / 2
    cell_length, rst = total_length / n_locks, total_length % n_locks

    if rst > 0:
        n_locks += rst / cell_length
        rst = rst % cell_length
        if rst > 0:
            n_locks += 1

    shared_coocc = [
        sharedctypes.Array(np.ctypeslib.ctypes.c_double,
                           np.zeros(cell_length),
                           lock=True) for x in xrange(n_locks)
    ]
    test_occurrences = np.zeros(total_length, dtype=int)

    #----------------------------------------------------------- INITIAL WORKER PROCESSES
    threads = {}
    sim_queue = Queue()
    iterations = range(n_iter)
    n_samples_occurrences = np.zeros(n_samples)
    synthetic_labels = []
    n_iterations_done = 0
    score_penalty = 0.
    convergence_values = []

    # Start initial processes
    for c in xrange(n_cores):
        t = Process(name='Worker %d' % c,
                    target=thread_step,
                    args=(iterations.pop(0), shared_coocc, cell_length,
                          n_samples, sim_queue, data, temp_folder, data_type,
                          annotation_params, classification_params, verbose,
                          debug, with_unique_occurrences, preclustering))
        threads[t.name] = t
        t.start()

    running_threads = len(threads)

    # Compute entropy values
    last_mean_ent = 0.

    #----------------------------------------------------------- THREADED ADDITIONAL COMPUTATIONS
    ############################################################################################
    def aux_computation(incoming_queue, convergence_check_queue, n_iter,
                        n_samples, cell_length, total_length, shared_coocc,
                        data_occurrences, test_occurrences, verbose,
                        convergence_step, writing_steps, last_mean_ent,
                        convergence_criterion, output_folder):
        """
        Additional computations for the BIN and WEIGHT similarity
        """

        for k in xrange(n_iter):
            (iter, id, n_steps, counts, synth_lab, mat) = incoming_queue.get()

            #  Updating number of test occurences
            if mat is not None:
                if data_type == 'NER':
                    mat = np.sum([data_occurrences[x] for x in mat])
                    in_train = np.setdiff1d(range(n_samples), mat.nonzero()[1])
                elif data_type in ['AQUA', 'AUDIO', 'AUDIOTINY']:
                    in_train = np.where(mat == 0)[0]

                test_occurrences += 1
                for i in in_train:
                    test_occurrences[i - 1 + np.cumsum(
                        [n_samples - np.arange(2, i + 1)])] -= 1  # Column
                    test_occurrences[(i + 1 + i * n_samples - (i + 1) *
                                      (i + 2) / 2):(n_samples - 1 +
                                                    i * n_samples - (i + 1) *
                                                    (i + 2) / 2)] -= 1  # Line

            # Convergence check if required
            if convergence_step > 1 and k % convergence_step == 1:
                print 'Checking Convergence'
                entropies = np.zeros(total_length)
                # Lock
                for cell in shared_coocc:
                    cell.get_lock().acquire()

                # Compute entropies
                for v, cell in enumerate(shared_coocc):
                    start_ind = v * cell_length
                    if v == len(shared_coocc) - 1 and rst > 0:
                        sim = np.array(np.frombuffer(
                            cell.get_obj()))[:rst] / np.clip(
                                test_occurrences[start_ind:], 1, n_iter)
                    else:
                        sim = np.array(np.frombuffer(
                            cell.get_obj())) / np.clip(
                                test_occurrences[start_ind:(
                                    start_ind + cell_length)], 1, n_iter)
                    entropies[start_ind:(start_ind + cell_length)] = [
                        -np.sum(st * np.log(st) +
                                (1. - st) * np.log(1. - st)) if
                        (st > 0 and st < 1) else 0. for st in sim
                    ]
                    del sim

                # Release
                for cell in shared_coocc:
                    cell.get_lock().release()

                entropies /= np.log(2)
                mean_ent = np.mean(entropies)
                nonz_mean_ent = np.mean(entropies[entropies != 0])
                del entropies
                if verbose >= 2:
                    print 'Mean Shannon Entropy: %.4f' % mean_ent
                    print '(Non-zero) Mean Shannon Entropy: %.4f' % nonz_mean_ent

                    # Compare to threshold
                    if np.abs(mean_ent -
                              last_mean_ent) < convergence_criterion:
                        print 'convergence reached for criterion %s at step %d' % (
                            convergence_criterion, iter + 1)
                        # Reached convergence: stop execution
                        convergence_check_queue.put(
                            (k, mean_ent, nonz_mean_ent, True))
                        return
                last_mean_ent = mean_ent
                convergence_check_queue.put(
                    (k, mean_ent, nonz_mean_ent, False))

            # Write matrix if required
            if verbose >= 4 and (iter + 1) in writing_steps:
                print 'Saving co-occurrence matrix at step %d' % (iter + 1)
                for cell in shared_coocc:
                    cell.get_lock().acquire()

                np.save(
                    os.path.join(output_folder,
                                 'sim_matrix_%d_partial' % (iter + 1)),
                    (np.hstack(shared_coocc)[:total_length] + score_penalty) /
                    np.clip(test_occurrences, 1, n_iter))

                for cell in shared_coocc:
                    cell.get_lock().release()

            if verbose >= 1:
                print 'Computation %d done' % (iter + 1)

    ############################################################################################
    incoming_queue = Queue_thread()
    convergence_check_queue = Queue_thread()
    aux_thread = threading.Thread(
        target=aux_computation,
        args=(incoming_queue, convergence_check_queue, n_iter, n_samples,
              cell_length, total_length, shared_coocc, data_occurrences,
              test_occurrences, verbose, convergence_step, writing_steps,
              last_mean_ent, convergence_criterion, output_folder))
    aux_thread.start()
    waiting_for_cvg_check = []

    # Retrieve results from queue and restat threads if needed
    for k in xrange(n_iter):
        # Retrieve and restart
        (id, n_steps, counts, synth_lab, mat, b) = sim_queue.get()
        threads[id].join()
        running_threads -= 1

        # Launch Compute thread
        incoming_queue.put((k, id, n_steps, counts, synth_lab, mat))

        # Wait for convergence check before modifying the cooc matrix even further
        if convergence_step > 1 and k % convergence_step == 1:
            print 'Waiting for convergence check'
            a, b, c, d = convergence_check_queue.get()
            convergence_values.append((a, b, c))
            if d:
                print 'Convergence reached, ending program'
                n_samples_occurrences += counts
                synthetic_labels.append(synth_lab)
                score_penalty += b
                iterations[:] = []

                # Terminates other processes + get lock to avoid termination problems
                for cell in shared_coocc:
                    cell.get_lock().acquire()
                for t in threads.values():
                    t.terminate()
                for cell in shared_coocc:
                    cell.get_lock().release()
                break

        # Start next process
        if len(iterations) > 0:
            t = Process(name=id,
                        target=thread_step,
                        args=(iterations.pop(0), shared_coocc, cell_length,
                              n_samples, sim_queue, data, temp_folder,
                              data_type, annotation_params,
                              classification_params, verbose, debug,
                              with_unique_occurrences, preclustering))
            threads[id] = t
            running_threads += 1
            t.start()
        else:
            del threads[id]

        # Additional Computations
        n_samples_occurrences += counts
        synthetic_labels.append(synth_lab)
        score_penalty += b

        # Verbose outputs
        del mat
        if verbose >= 1:
            n_iterations_done += 1
            print >> sys.stderr, '\n>>>> %d/%d iterations done\n' % (
                n_iterations_done, n_iter)

    # Join remaining processes (normally, they are processe that crashed)
    for i, t in enumerate(threads.values()):
        t.join()
        if verbose >= 1:
            print 'joined thread %d' % i
    aux_thread.join()
    gc.collect()

    #------------------------------------------------------------------------- POST PROCESS MATRIX
    # Reshapping the result into a true numpy matrix
    print '> Reshaping co_occurence matrix'
    co_occ = np.zeros((n_samples, n_samples), dtype=float)
    co_occ[np.triu_indices(
        n_samples,
        k=1)] = np.hstack(shared_coocc)[:total_length] + score_penalty
    del shared_coocc

    print 'Normalizing similarities...'
    test_occurrences[test_occurrences == 0] = 1.
    co_occ[np.triu_indices(n_samples, k=1)] /= test_occurrences

    if convergence_step > 0:
        print 'Saving Convergence criterion values'
        with open(os.path.join(output_folder, 'convergence_check'), 'w') as f:
            f.write('Iteration\tMean\tNonZeroMean\n')
            f.write('\n'.join(
                ['\t'.join(map(str, obj)) for obj in convergence_values]))

    return n_samples_occurrences, synthetic_labels, co_occ, test_occurrences
Example #21
0
    classification_params['similarity_type'] = 'BIN'
    map_save_step = 5  # save mAP values every ``step`` iteration
    steps = [
        1, 5, 10, 15, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100, 120, 140, 160,
        180, 210, 230, 250, 270
    ]  # iterations to consider for the correlation comparisons
    with_unique_occurrences = (data_type in ['NER', 'AUDIO', 'AUDIOTINY'])
    with_common_label_wordform = (data_type == 'NER' or data_type == 'AQUA')
    preclustering = index_to_label if (data_type in ['NER', 'AQUA']) else []

    #----------------------------------------------------------- RUN THREADS
    # Initialize shared array
    total_length = n_samples * (n_samples - 1) / 2
    co_occ = [
        sharedctypes.Array(np.ctypeslib.ctypes.c_long,
                           np.zeros(total_length, dtype='int32'),
                           lock=True)
    ]
    count_lab = np.zeros(n_samples)
    synthetic_labels = [0] * n_iter
    compares = {}

    # Sequential loop to build the similarity
    for n in xrange(n_iter):
        # Split + Annotation
        train, test, _ = split_data(n,
                                    data,
                                    data_type,
                                    classification_params['training_size'],
                                    classification_params['classifier_type'],
                                    annotation_params,
    # y = 2048
    y = 256

    num_cpus = 2
    #num_cpus = 4

    a_shape = (x, y)
    b_shape = (y, x)

    # allocate source and dest. arrays
    a = numpy.random.uniform(size=a_shape)
    b = numpy.random.uniform(size=b_shape)
    c = numpy.empty((x, x))

    # allocated shared memory
    shared_a = sharedctypes.Array(ctypes.c_double, a.flat, lock=False)
    shared_b = sharedctypes.Array(ctypes.c_double, b.flat, lock=False)
    shared_c = sharedctypes.Array(ctypes.c_double, c.flat, lock=False)

    # access the answer as a numpy array, set dimensions
    nd_c = ctypeslib.as_array(shared_c).reshape((a_shape[0], b_shape[1]))

    # 1 process reference
    print("starting.")
    t1 = time()
    ans1 = numpy.dot(a, b)
    print("1 CPU:", time() - t1)

    # x must be a multiple of num_cpus
    assert (x % num_cpus == 0)
def modifiedGreedyInsertionCPU(asdGMM, xTilde, y, nPartial=10, speedUp=False, regVal=1e-2, doOpt=True, add2Sigma=1e-3, iterMax=100, relTolLogLike=1e-3,
                        absTolLogLike=1e-3, relTolMSE=1e-3, absTolMSE=1e-3,mseConverged=0., convBounds=[0.5,0.3], regValStep=[0.08, 4], addKWARGS={} ):
    
    addKWARGS.setdefault('JacSpace')
    
    nPt = xTilde.shape[1]
    nK = asdGMM.nK
    weights = asdGMM._getWeightsCPU(xTilde)# TBD or based on x only; I odn't think so
    indKernel = np.argmax(weights,axis=0)
    
    if mainDoParallel_:
        xL = nPartial*[x]
        doOptL = nPartial*[doOpt]
        add2SigmaL = nPartial*[add2Sigma]
        iterMaxL = nPartial*[iterMax]
        relTolL = nPartial*[relTol]
        absTolL = nPartial*[absTol]
        mseConvergedL = nPartial*[mseConverged]

    # Now split each of the kernels
    #This is somewhat dirty
    bestOverall = sharedctypes.Value(ctypes.c_double); bestOverall = 1e200
    bestThetaOpt = sharedctypes.Array(ctypes.c_double,varsInMat(y.shape[0])*y.shape[1])
    addKWARGS.update({'bestOverall':bestOverall,'bestThetaOpt':bestThetaOpt})
    resultList = []
    usedClass = asdGMM.__class__
    for k in range(nK):
        indKernelK = indKernel == k
        xTildeK = xTilde[:,indKernelK]
        nPtK = xTildeK.shape[1]
        if xTildeK.shape[1] < asdGMM.nVarTot*4:
            warnings.warn("Skipping kernel {0} in update process due to a lack of points".format(k))
            continue
        if speedUp:
            # Check consistency if assumption that the base of each kernel is disjoint from the others
            parasitInfl = np.mean(weights[np.hstack((np.arange(0,k),np.arange(k+1,nK))),indKernelK])
            if parasitInfl > 0.05:
                warnings.warn("Disjoint base assumption might not be valid for kernel {0}".format(k))
            del parasitInfl

        # Generate random samples
        randSampI = np.random.choice(nPtK,nPartial)
        randSampJ = np.random.choice(nPtK,nPartial)
        indReplace = randSampI == randSampJ
        while np.any(indReplace):
            randSampI[indReplace] = np.random.choice(nPtK,sum(indReplace))
            randSampJ[indReplace] = np.random.choice(nPtK,sum(indReplace))
            indReplace = randSampI == randSampJ

        if speedUp:
            assert 0,"TBD"
        else:
            if mainDoParallel_:
                assert 0, "TBD cuda driver error and other"
                GMMparsL = [asdGMM.toPars() for k in range(nPartial)]
                xTildeKL = nPartial*[xTildeK]
                # partialInsertionCPU(GMMpars,x,xK,k,i,j,doOpt=True,add2Sigma=1e-3,iterMax=100,relTol=1e-3,absTol=1e-3)
                with Pool(4) as p:
                    newList = p.starmap(modifiedPartialInsertionCPU,
                                        zip(asdGMMparsL,xTildeL,xTildeKL,y,nPartial*[k],randSampI,randSampJ,doOptL,regValL,add2SigmaL,
                                            iterMaxL,relTolL,absTolL,mseConvergedL))
                resultList += newList
            else:
                resultList += lmap(
                    lambda ij:modifiedPartialInsertionCPU(asdGMM.toPars(),xTilde,xTildeK,y,k,ij[0],ij[1],doOpt=doOpt,regVal=regVal,add2Sigma=add2Sigma,
                                                  iterMax=iterMax,relTolLogLike=relTolLogLike,absTolLogLike=absTolLogLike,relTolMSE=relTolMSE,absTolMSE=absTolMSE,mseConverged=mseConverged, convBounds=convBounds, regValStep=regValStep, addKWARGS=addKWARGS, usedClass=usedClass), zip(randSampI,randSampJ))
        
    #Get the best updated model among all tested ones
    #Here we care more about mse than loglike
    bestVal = np.Inf
    bestPars = None
    for newVal, newPars in resultList:
        if newVal < bestVal:
            bestVal = newVal
            bestPars = newPars

    # Load pars
    newasdGMM = aSymDynamicsGMM(parDict=bestPars)
    return  newasdGMM
Example #24
0
    def __init__(self,
                 update_data_func=None,
                 state_transition_func=None,
                 thermostat=False,
                 kp=0.06, ki=0.0075, kd=0.01,
                 heater_segments=8,
                 ext_sw_heater_drive=False):
        """Create variables used to send in packets to the roaster. The update
        data function is called when a packet is opened. The state transistion
        function is used by the timer thread to know what to do next. See wiki
        for more information on packet structure and fields."""
        # constants for protocol decoding
        self.LOOKING_FOR_HEADER_1 = 0
        self.LOOKING_FOR_HEADER_2 = 1
        self.PACKET_DATA = 2
        self.LOOKING_FOR_FOOTER_2 = 3
        # constants for connection state monitoring
        self.CS_NOT_CONNECTED = -2
        self.CS_ATTEMPTING_CONNECT = -1
        self.CS_CONNECTING = 0
        self.CS_CONNECTED = 1
        # constants for connection attempt type
        self.CA_NONE = 0
        self.CA_AUTO = 1
        self.CA_SINGLE_SHOT = 2

        self._create_update_data_system(update_data_func)
        self._create_state_transition_system(state_transition_func)

        self._header = sharedctypes.Array('c', b'\xAA\xAA')
        self._temp_unit = sharedctypes.Array('c', b'\x61\x74')
        self._flags = sharedctypes.Array('c', b'\x63')
        self._current_state = sharedctypes.Array('c', b'\x02\x01')
        self._footer = b'\xAA\xFA'

        self._fan_speed = sharedctypes.Value('i', 1)
        self._heat_setting = sharedctypes.Value('i', 0)
        self._target_temp = sharedctypes.Value('i', 150)
        self._current_temp = sharedctypes.Value('i', 150)
        self._time_remaining = sharedctypes.Value('i', 0)
        self._total_time = sharedctypes.Value('i', 0)

        self._disconnect = sharedctypes.Value('i', 0)
        self._teardown = sharedctypes.Value('i', 0)

        self._cooling_for_pid_control = False

        # for SW PWM heater setting
        self._heater_level = sharedctypes.Value('i', 0)
        # the following vars are not process-safe, do not access them
        # from the comm or timer threads, nor from the callbacks.
        self._ext_sw_heater_drive = ext_sw_heater_drive
        if not self._ext_sw_heater_drive:
            self._thermostat = thermostat
        else:
            self._thermostat = False
        self._pid_kp = kp
        self._pid_ki = ki
        self._pid_kd = kd
        self._heater_bangbang_segments = heater_segments

        # initialize to 'not connected'
        self._connected = sharedctypes.Value('i', 0)
        self._connect_state = sharedctypes.Value('i', self.CS_NOT_CONNECTED)
        # initialize to 'not trying to connect'
        self._attempting_connect = sharedctypes.Value('i', self.CA_NONE)

        # create comm process
        self.comm_process = mp.Process(
            target=self._comm,
            args=(
                self._thermostat,
                self._pid_kp,
                self._pid_ki,
                self._pid_kd,
                self._heater_bangbang_segments,
                self._ext_sw_heater_drive,
                self.update_data_event,))
        self.comm_process.daemon = True
        self.comm_process.start()
        # create timer process that counts down time_remaining
        self.time_process = mp.Process(
            target=self._timer,
            args=(
                self.state_transition_event,))
        self.time_process.daemon = True
        self.time_process.start()
Example #25
0
 def get_multiprocess_numpy(dtype, shape):
     tmp = np.ctypeslib.as_ctypes(np.zeros(shape, dtype=dtype))
     return sharedctypes.Array(tmp._type_, tmp, lock=False)
Example #26
0
def compute_similarity_ova(n_iter,
                           n_cores,
                           n_locks,
                           input_file,
                           ground_truth_file,
                           data_type,
                           n_samples,
                           data,
                           data_occurrences,
                           index_to_label,
                           label_to_index,
                           temp_folder,
                           output_folder,
                           annotation_params,
                           classification_params,
                           verbose,
                           debug,
                           with_unique_occurrences,
                           preclustering,
                           gtonly=True):
    """
    Computes the simlarity matrix in the basic setting.

    Args:
     * ``n_iter`` (*int*): number of iterations.
     * ``n_cores`` (*int*): number of cores to use.
     * ``n_locks`` (*int*): number of locks to use on the full shared matrix.
     * ``data_type`` (*str*): data set to use.
     * ``n_samples`` (*int*): number of samples.
     * ``data`` (*struct*): initial data.
     * ``data_occurrences`` (*struct*): indicates where each of the samples occurs in the data base.
     * ``index_to_label`` (*list*): maps a sample's index to a string representation.
     * ``label_to_index`` (*dict*): reverse index_to_label mapping.
     * ``temp_folder`` (*str*): path to the temporary folder.
     * ``output_folder`` (*str*): path to the output folder.
     * ``annotation_params`` (*list*): parameters for synthetic annotation.
     * ``classification_params`` (*list*): parameters for classification.
     * ``verbose`` (*int*): sets the verbosity level.
     * ``with_unique_occurrences`` (*bool*): indicates wether samples occur only once in the data set or not.
     * ``with_common_label_wordform`` (*bool*): indicates wether to give the ame label to samples with identical wordform or not.
     * ``writing_steps`` (*list*): steps at which to save the partial matrix.
     * ``gt_only`` (*boolean, optional*): if True, experiments will only be conducted from query samples from the ground-truth. Defqults to True.

    Returns:
     * ``n_samples_occurrences`` (*list*): number of occurrences of each sample in a test set over all iterations.
     * ``synthetic_labels`` (*list*): synthetic labels repartition for each iteration.
     * ``co_occ`` (*ndarray*): full similarity matrix.
     """

    # Additional parameters
    full_matrix = np.zeros((n_samples, n_samples))
    n_locks, rst, cell_length = 1, 0, n_samples
    ground_truth = parse_ground_truth(data_type, ground_truth_file,
                                      label_to_index)
    sampleindx = ground_truth_indices(
        data_type, ground_truth_file,
        label_to_index) if gtonly else xrange(n_samples)
    shuffle(sampleindx)

    #################################### LOOP OVER EACH SAMPLE
    for train_sample in sampleindx:

        name = index_to_label[train_sample]
        print "Iteration for sample %d - %s" % (train_sample, name)
        iter_annotation_params = annotation_params.copy()
        iter_annotation_params['sample'] = 'B-%d' % train_sample

        print 'Pre-parsing train sentences'
        data_files = [
            f for f in os.listdir(input_file) if f.endswith('.xml.u8')
        ]
        with open(
                os.path.join(iter_annotation_params['ova_occurrences'],
                             '%s.pkl' % (name)), 'rb') as f:
            train_docs = pickle.load(f)
            train_files = train_docs.keys()

        partial_load_file = partial(
            load_file,
            input_file=input_file,
            train_docs=train_docs,
            label_to_index=label_to_index,
            classification_params=classification_params)
        pool = Pool(processes=n_cores)
        samples = pool.map(partial_load_file, train_files)
        train = ()
        for tl in samples:
            train = chain(train, tl)
        del samples
        iter_annotation_params['ova_occurrences'] = (list(train), train_files)

        # Shared Array
        print 'Creating Shared Array'
        shared_coocc = [
            sharedctypes.Array(np.ctypeslib.ctypes.c_double,
                               np.zeros(n_samples),
                               lock=True)
        ]
        test_occurrences = np.zeros(n_samples, dtype=int)

        #----------------------------------------------------------- INITIAL WORKER PROCESSES
        threads = {}
        sim_queue = Queue()
        iterations = range(n_iter)
        n_samples_occurrences = np.zeros(n_samples)
        synthetic_labels = []
        n_iterations_done = 0
        score_penalty = 0.

        # Start initial processes
        for c in xrange(n_cores):
            t = Process(name='Worker %d' % c,
                        target=thread_step,
                        args=(iterations.pop(0), shared_coocc, cell_length,
                              n_samples, sim_queue, data, temp_folder,
                              data_type, iter_annotation_params,
                              classification_params, verbose, debug,
                              with_unique_occurrences, preclustering))
            threads[t.name] = t
            t.start()

        #----------------------------------------------------------- THREADED ADDITIONAL COMPUTATIONS
        ############################################################################################
        def aux_computation_OVA(incoming_queue, n_iter, test_occurrences,
                                data_occurrences):
            """
            Additional computations for the OVA annotation
            """

            for k in xrange(n_iter):
                (_, _, _, _, _, mat) = incoming_queue.get()

                #Updating number of test occurences
                if mat != None:
                    if data_type == 'NER':  #In NER #test ~ #entities + sparse matrices
                        mat = np.sum([data_occurrences[x] for x in mat])

                    mat[mat != 0] = 1
                    test_occurrences += mat

        ############################################################################################
        incoming_queue = Queue_thread()
        aux_thread = threading.Thread(target=aux_computation_OVA,
                                      args=(
                                          incoming_queue,
                                          n_iter,
                                          test_occurrences,
                                          data_occurrences,
                                      ))
        aux_thread.start()

        #--------- Retrieve results from queue and restat threads if needed
        for k in xrange(n_iter):
            #---- Retrieve and restart
            (id, n_steps, counts, synth_lab, mat, b) = sim_queue.get()
            threads[id].join()

            #Launch Compute thread
            incoming_queue.put((k, id, n_steps, counts, synth_lab, mat))

            #Start next process
            if len(iterations) > 0:
                t = Process(name=id,
                            target=thread_step,
                            args=(iterations.pop(0), shared_coocc, cell_length,
                                  n_samples, sim_queue, data, temp_folder,
                                  data_type, iter_annotation_params,
                                  classification_params, verbose, debug,
                                  with_unique_occurrences, preclustering))
                threads[id] = t
                t.start()
            else:
                del threads[id]

            #---- Additional Computations
            n_samples_occurrences += counts
            synthetic_labels.append(synth_lab)
            score_penalty += b

            #----- Verbose outputs
            del mat
            if verbose >= 1:
                n_iterations_done += 1
                print >> sys.stderr, '\n>>>> %d/%d iterations done\n' % (
                    n_iterations_done, n_iter)

        #------------ Join remaining processes if any
        for i, t in enumerate(threads.values()):
            t.join()
            if verbose >= 1:
                print 'joined thread %d' % i
        aux_thread.join()
        gc.collect()

        with shared_coocc[0].get_lock():
            arr = np.frombuffer(shared_coocc[0].get_obj())

        # Normalizing
        test_occurrences[test_occurrences == 0] = 1.
        co_occ = arr / test_occurrences
        full_matrix[train_sample, :] = arr

        # Output sorted distribution for the current sample if verbosity level high enough
        if verbose >= 2:
            gt = ground_truth[name]
            sorted_ind = np.argsort(co_occ)
            with open(os.path.join(output_folder, '%s_sorted_sim.txt' % name),
                      'w') as f:
                f.write('\n'.join([
                    '%s %s\t%d-%s\t%s\t%s\t%s' %
                    ('>' if index_to_label[i] in gt else '', name, i,
                     index_to_label[i], co_occ[i], arr[i], test_occurrences[i])
                    for i in sorted_ind
                ]))

        # Output mAP for the current sample
        if verbose >= 2:
            print "######## Result for iteration %d - %s" % (train_sample,
                                                             name)
            eval_retrieval(data_type,
                           co_occ,
                           label_to_index,
                           index_to_label,
                           ground_truth_file,
                           output_folder,
                           ova=train_sample,
                           writing=True)

    return n_samples_occurrences, synthetic_labels, arr, None
Example #27
0
import ctypes
from multiprocessing import sharedctypes, Process
import numpy
from numpy import ctypeslib


def f(cta):
    from numpy import ctypeslib
    npa = ctypeslib.as_array(cta._obj)
    npa[0] = npa.sum()


cta = sharedctypes.Array(ctypes.c_double, numpy.arange(1e6))
npa = ctypeslib.as_array(cta._obj)
p1 = Process(target=f, args=(cta, ))
Example #28
0
def func0(lock, a):
    lock.acquire()
    print a.value
    func.add1(a)
    #a.value += 1
    time.sleep(1)
    proc = current_process()
    print proc.name, proc.pid
    lock.release()


lock0 = Lock()
lock = Semaphore(2)  # Semaphore(1) == Lock()
v = Value('f', 0.0)
a = Array('i', range(10))
sv = sharedctypes.Value('f', 0.0)
sa = sharedctypes.Array('i', range(10))
sub_proc0 = Process(target=func0, args=(lock, sa))
sub_proc1 = Process(target=func0, args=(lock, sa))
sub_proc2 = Process(target=func0, args=(lock, sa))

sub_proc0.start()
sub_proc1.start()
sub_proc2.start()

sub_proc0.join()
sub_proc1.join()
sub_proc2.join()

func0(lock, sa)
        conn.poll()
        a, n = conn.recv()
        conn.send(f(x, a, n))

if __name__ == '__main__':
    N = 10000000
    numprocesses = 6
    complexity = 10

    print 'Array size:', N
    print 'Num processors:', numprocesses
    print 'Task complexity:', complexity
    print

    x = ones(N)
    x_sct = sharedctypes.Array(ctypes.c_double, x, lock=False)
    A = 1 + arange(numprocesses)

    pipes = [multiprocessing.Pipe() for _ in xrange(numprocesses)]
    server_conns, client_conns = zip(*pipes)
    processes = [multiprocessing.Process(
                            target=process_listener,
                            args=(x_sct, conn)
                            ) for conn in client_conns]
    for p in processes:
        p.start()

    start = time.time()
    for i, a in enumerate(A):
        server_conns[i].send((a, complexity))
    for i in xrange(numprocesses):
Example #30
0
from numpy import ctypeslib

n_params = 256 * 256 * 3 * 64

def f(q, S):
    x = np.random.rand(n_params)  # data pre-fetched
    x_ = ctypeslib.as_array(S.get_obj())

    st = time.time()
    #x_ = x  # This does not overwrite the shared memory.
    st0 = time.time()
    x_[:] = x
    print("SetTime:{}".format(time.time() - st0))
    x_[0] = 1000
    data = {"x": None, "st": st}
    q.put(data)

if __name__ == '__main__':
    buff = np.random.rand(n_params)
    S = sharedctypes.Array("f", buff)
    q = Queue()
    p = Process(target=f, args=(q, S, ))
    p.start()
    data = q.get()

    print("Parent:{}[s]".format(time.time() - data["st"]))
    x_ = ctypeslib.as_array(S.get_obj())
    print("Shape:{}".format(x_.shape))
    print("init-val:{}, changed-val:{}".format(buff[0], x_[0]))
    p.terminate()