Python closeの例、numba.cuda.close Pythonの例

コード例 #1

0

ファイルを表示

def inference(file_path, file_urls):
    result = dict()

    config = yaml.load(open("./assets/config.yaml", 'r'),
                       Loader=yaml.FullLoader)
    pse = InferencePSE(config["pse_evaluation_parameter"], file_path)
    ocr = InferenceOCR(config["ocr_evaluation_parameter"], file_path)
    #Inference PSE
    pse_time = pse.run()
    #Crop image based on PSE output
    # Release the gpu memory
    cuda.select_device(int(config["pse_evaluation_parameter"]["gpu_list"]))
    cuda.close()
    print(file_path)
    CropPSE(file_path)
    #Inference OCR
    ocr_time = ocr.run()
    #Combining Result
    #CreateTxt(file_urls)
    for file_name in file_urls:
        result[file_name] = dict()
        txt_file = "./assets/demo/text/" + file_name.replace("jpg", "txt")
        img_file = file_path + file_name
        df, _, _, _ = create_df(txt_file)
        dict_cells, list_infos = create_cells(df)
        result[file_name]['df'] = create_DB(dict_cells, list_infos).drop(
            'idx', axis=1).to_html(header="true")
        # Visualizer
        result[file_name]['img'] = connect_and_save(img_file, dict_cells,
                                                    list_infos)
    return result

コード例 #2

0

ファイルを表示

ファイル: face_detection_multithreading.py プロジェクト: NCYounes/TensorflowFaceDetection

def worker(input_q, output_q):
    # Load a (frozen) Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)
        mtcnn = detect_and_align.create_mtcnn(sess, None)

    fps = FPS().start()
    while True:
        fps.update()
        frame = input_q.get()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)        
        face_patches, padded_bounding_boxes, landmarks = detect_and_align.detect_faces(frame_rgb, mtcnn)
        output = dict(face_boxes=padded_bounding_boxes)
        output_q.put(output)

    fps.stop()
    sess.close()
    cuda.select_device(0)
    cuda.close()

コード例 #3

0

ファイルを表示

def gpu_dmt(cand, device=0):
    """
    :param cand: Candidate object
    :param device: GPU id
    :return:
    """
    cuda.select_device(device)
    chan_freqs = cuda.to_device(np.array(cand.chan_freqs, dtype=np.float32))
    dm_list = cuda.to_device(np.linspace(0, 2 * cand.dm, 256, dtype=np.float32))
    dmt_return = cuda.to_device(np.zeros((256, cand.data.shape[0]), dtype=np.float32))
    cand_data_in = cuda.to_device(np.array(cand.data.T, dtype=np.uint8))

    @cuda.jit
    def gpu_dmt(cand_data_in, chan_freqs, dms, cand_data_out, tsamp):
        ii, jj, kk = cuda.grid(3)
        if ii < cand_data_in.shape[0] and jj < cand_data_in.shape[1] and kk < dms.shape[0]:
            disp_time = int(
                -1 * 4148808.0 * dms[kk] * (1 / (chan_freqs[0]) ** 2 - 1 / (chan_freqs[ii]) ** 2) / 1000 / tsamp)
            cuda.atomic.add(cand_data_out, (kk, jj), cand_data_in[ii, (jj + disp_time) % cand_data_in.shape[1]])

    threadsperblock = (16, 8, 8)
    blockspergrid_x = math.ceil(cand_data_in.shape[0] / threadsperblock[0])
    blockspergrid_y = math.ceil(cand_data_in.shape[1] / threadsperblock[1])
    blockspergrid_z = math.ceil(dm_list.shape[0] / threadsperblock[2])

    blockspergrid = (blockspergrid_x, blockspergrid_y, blockspergrid_z)

    gpu_dmt[blockspergrid, threadsperblock](cand_data_in, chan_freqs, dm_list, dmt_return, float(cand.tsamp))

    cand.dmt = dmt_return.copy_to_host()

    cuda.close()

    return cand

コード例 #4

0

ファイルを表示

ファイル: MemoryManagement.py プロジェクト: szwank/Optymalizacja-sieci-neuronowych

 def relase_GPU_memory():
     K.clear_session()
     cuda.select_device(0)
     cuda.close()
     ses = K.get_session()
     config = tf.ConfigProto()
     K.tensorflow_backend.set_session(tf.Session(config=config))

コード例 #5

0

ファイルを表示

def cuda_select_device(dev_i):
    try:
        cuda.close()
    except Exception as e:
        print(e)
        #pass
    cuda.select_device(dev_i)

コード例 #6

0

ファイルを表示

ファイル: networks.py プロジェクト: olliematthews/multi-agent-rl-thesis

 def clear(self):
     K.clear_session()
     gc.collect()
     del self.model
     for gpu in range(len(cuda.gpus)):
         cuda.select_device(gpu)
         cuda.close()

コード例 #7

0

ファイルを表示

ファイル: gpu_dedisp.py プロジェクト: Miao-cc/dedisperse_gpu_FAST

def gpu_dmt_timeseries(dedisp_times, psr_data, max_delay, device=0):
    """
    :param cand: Candidate object
    :param device: GPU id
    :return:
    """
    cuda.select_device(device)
    dm_time = np.zeros((dedisp_times.shape[1], int(psr_data.shape[0]-max_delay)), dtype=np.float32)

    @cuda.jit(fastmath=True)
    def gpu_dmt(cand_data_in, all_delays, cand_data_out):
        ii, jj, kk = cuda.grid(3)
        if ii < cand_data_in.shape[0] and jj < cand_data_out.shape[1] and kk < all_delays.shape[1]:
            cuda.atomic.add(cand_data_out, (kk, jj), cand_data_in[ii, (jj + all_delays[ii,kk]) ]) 

    #with cuda.pinned(dedisp_times, dm_time, psr_data):
    all_delays = cuda.to_device(dedisp_times)
    dmt_return = cuda.device_array(dm_time.shape, dtype=np.float32)

    cand_data_in = cuda.to_device(np.array(psr_data.T, dtype=psr_data.dtype))

    threadsperblock = (4, 8, 32)
    blockspergrid_x = math.ceil(cand_data_in.shape[0] / threadsperblock[0])
    blockspergrid_y = math.ceil(cand_data_in.shape[1] / threadsperblock[1])
    blockspergrid_z = math.ceil(dedisp_times.shape[1] / threadsperblock[2])
    blockspergrid = (blockspergrid_x, blockspergrid_y, blockspergrid_z)

    gpu_dmt[blockspergrid, threadsperblock](cand_data_in, all_delays,  dmt_return)
    dm_time = dmt_return.copy_to_host()
    #print(all_delays.shape)
    cuda.close()
    return dm_time

コード例 #8

0

ファイルを表示

def gpu_dedisperse(cand, device=0):
    """
    :param cand: Candidate object
    :param device: GPU id
    :return:
    """
    cuda.select_device(device)
    chan_freqs = cuda.to_device(np.array(cand.chan_freqs, dtype=np.float32))
    cand_data_in = cuda.to_device(np.array(cand.data.T, dtype=np.uint8))
    cand_data_out = cuda.to_device(np.zeros_like(cand.data.T, dtype=np.uint8))

    @cuda.jit
    def gpu_dedisp(cand_data_in, chan_freqs, dm, cand_data_out, tsamp):
        ii, jj = cuda.grid(2)
        if ii < cand_data_in.shape[0] and jj < cand_data_in.shape[1]:
            disp_time = int(-4148808.0 * dm * (1 / (chan_freqs[0]) ** 2 - 1 / (chan_freqs[ii]) ** 2) / 1000 / tsamp)
            cand_data_out[ii, jj] = cand_data_in[ii, (jj + disp_time) % cand_data_in.shape[1]]

    threadsperblock = (32, 32)
    blockspergrid_x = math.ceil(cand_data_in.shape[0] / threadsperblock[0])
    blockspergrid_y = math.ceil(cand_data_in.shape[1] / threadsperblock[1])

    blockspergrid = (blockspergrid_x, blockspergrid_y)

    gpu_dedisp[blockspergrid, threadsperblock](cand_data_in, chan_freqs, float(cand.dm), cand_data_out,
                                               float(cand.tsamp))

    cand.dedispersed = cand_data_out.copy_to_host().T

    cuda.close()

    return cand

コード例 #9

0

ファイルを表示

def main():
    data = pd.read_csv(
        'C:/Users/kdan/BigJob12/main_project/_db/data/model_data/working/to_reid.csv'
    )

    image_path = 'C:/Users/kdan/BigJob12/main_project/_db/data/Preprocessed_data/'

    copy_path = 'C:/Users/kdan/BigJob12/main_project/_db/data/model_data/gallery/gallery_list/'

    shutil.rmtree(copy_path)

    if not os.path.isdir(copy_path[:-1]):
        os.mkdir(copy_path[:-1])

    start = time.time()  # 시작 시간 저장

    for image_file_path in data['file_name']:
        try:
            shutil.copy(image_path + image_file_path,
                        copy_path + image_file_path.split('/')[-1])
        except:
            pass

    print(len(data['file_name']))
    print("time :", time.time() - start)

    gc.collect()
    sys.stdout.flush()
    cuda.close()
    torch.cuda.empty_cache()  # PyTorch thing

コード例 #10

0

ファイルを表示

ファイル: GODBLESSDICTIONARIES.py プロジェクト: alperyyildiz/demand

    def CREATE_MODEL(self):
        try:
            cuda.select_device(0)
            cuda.close()
        except:
            pass
        try:
            tf.keras.backend.clear_session()
        except:
            pass

        self.outsize = self.dict['OTHERS']['1']['OUT_SIZE']
        self.windowlength = self.dict['OTHERS']['1']['WINDOW_LEN']
        self.MAX_window = self.dict['OTHERS']['1']['WINDOW_LEN']
        self.batch =self.dict['OTHERS']['1']['BATCH_SIZE']
        self.period = self.dict['OTHERS']['1']['PERIOD']
        self.optimizer.learning_rate = self.dict['OTHERS']['1']['LR']
        self.epochz = self.dict['OTHERS']['1']['EPOCHS']

        if self.FIRST_ITER:
            self.CREATE_DATA()
            self.FIRST_ITER = False


        if ~(len(list(self.VARS_EX['OTHERS'].keys())) == 0 or list(self.VARS_EX['OTHERS'].keys()) == ['LR'] or list(self.VARS_EX['OTHERS'].keys()) == ['LR','EPOCHS'] or list(self.VARS_EX['OTHERS'].keys()) == ['EPOCHs']):
            self.CREATE_DATA()

        self.model_parallel()
        self.trainingz()
        self.SAVE_PLOTS()
        print(self.epochz)

コード例 #11

0

ファイルを表示

ファイル: gpufunc.py プロジェクト: e1morganUCSD/pyLapdog

def stupidconv_gpu(img, filt, padval):
    """
    does convolution without using FFT because FFT is pissing me off and giving me weird answers
    :param img:
    :param filt:
    :param padval:
    :return:
    """
    cuda.close()
    cuda.select_device(1)
    # get the number of nonzero entries in the filter for later averaging of result
    filt_nnz = np.count_nonzero(filt)

    # pad the images
    s_filt = filt.shape
    s_img = img.shape

    # appropriate padding depends on context
    # pad with filt size all around img
    pad_img = np.ones((s_img[0] + (2 * s_filt[0]), s_img[1] + (2 * s_filt[1])), dtype=np.float32) * padval

    pad_img[s_filt[0]: s_img[0] + s_filt[0], s_filt[1]: s_img[1] + s_filt[1]] = img

    output = np.zeros(pad_img.shape, dtype=np.float32)

    d_pad_img = cuda.to_device(pad_img)
    d_filt = cuda.to_device(filt)
    d_output = cuda.to_device(output)

    stupidconv_gpu_helper(d_pad_img, d_filt, s_img[0], s_img[1], s_filt[0], s_filt[1], d_output)

    output = d_output.copy_to_host()
    output = output[s_filt[0]:s_filt[0] + s_img[0], s_filt[1]:s_filt[1] + s_img[1]]

    return output / filt_nnz

コード例 #12

0

ファイルを表示

ファイル: face_feature_demo.py プロジェクト: johndpope/BA

    def detect(self, image):
        cuda.select_device(0)

        config = ConfigProto()
        config.gpu_options.allow_growth = True
        session = InteractiveSession(config=config)
        ROOT_DIR = "/home/bernihoh/Bachelor/SMS/MaskRCNN/samples/SMSNetworks/face_feature_detection/"
        MODEL_DIR = os.path.join(ROOT_DIR, "logsFaceFeatureDetection")
        COCO_MODEL_PATH = "/home/bernihoh/Bachelor/SMS/MaskRCNN/samples/SMSNetworks/face_feature_detection/mask_rcnn_face_feature_detection_0029.h5"
        config = InferenceConfig()
        config.display()

        # Create model object in inference mode.
        model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

        # Load weights trained on MS-COCO
        model.load_weights(COCO_MODEL_PATH, by_name=True)

        class_names = ["bg", "iris_l", "inner_eye_l", "outer_eye_l", "eye_brow_l", "cheek_l", "iris_r",
                       "inner_eye_r", "outer_eye_r", "eye_brow_r", "cheek_r", "nose_tip", "nose", "mouth",
                       "chin", "face", "head", "distortion"]

        results = model.detect([image], verbose=1)
        r = results[0]
        session.close()
        cuda.close()
        return r

コード例 #13

0

ファイルを表示

ファイル: train_sp_1.py プロジェクト: OOZUIMEI/predict_pm25

def get_prediction_real_time(sparkEngine, model=None, url_weight="", dim=15, prediction_weight="", encoder_length=24, decoder_length=24, attention_length=24, is_close_cuda=True):
    # continuously crawl aws and aqi & weather
    end = utils.get_datetime_now()
    end = end - timedelta(hours=1)
    # end = datetime.strptime("2018-06-19 11:01:00", p.fm)
    # e_ = end.strftime(p.fm)
    start = end - timedelta(hours=23)
    start = start.replace(minute=0, second=0, microsecond=0)
    # s_ = start.strftime(p.fm)
    # 2. process normalize data
    vectors, w_pred, china_vectors, timestamp = sparkEngine.process_vectors(start, end, dim)
    v_l = len(vectors)
    if v_l:
        sp_vectors = psv.convert_data_to_grid_exe(vectors)
        if v_l < encoder_length:
            sp_vectors = np.pad(sp_vectors, ((encoder_length - v_l,0), (0,0), (0,0), (0, 0)), 'constant', constant_values=0)
        # repeat for 25 districts
        if w_pred:
            w_pred = np.repeat(np.expand_dims(w_pred, 1), p.grid_size, 1)
            de_vectors = psv.convert_data_to_grid_exe(w_pred)
            # pad to fill top elements of decoder vectors
            de_vectors = np.pad(de_vectors, ((0, 0), (0, 0), (0, 0), (6, 0)), 'constant', constant_values=0)
        else:
            # know nothing about future weather forecast
            de_vectors = np.zeros((decoder_length, p.grid_size, p.grid_size, dim))
        sp_vectors = np.concatenate((sp_vectors, de_vectors), axis=0)

        c_l = len(china_vectors)
        if c_l < attention_length:
            # print(attention_length - c_l)
            china_vectors = np.pad(china_vectors, ((attention_length - c_l, 0), (0, 0)), 'constant', constant_values=0)

        # 4. Feed to model
        if model is None:
            # model = BaselineModel(encoder_length=encoder_length, encode_vector_size=12, batch_size=1, decoder_length=decoder_length, rnn_layers=1,
            #                 dtype='grid', grid_size=25, use_cnn=True)
            # model.set_data(sp_vectors, [0], None)
            # model = MaskGan(encoder_length=encoder_length, encode_vector_size=15, batch_size=1, decode_vector_size=9, grid_size=25, use_cnn=True)
            model = APGan(encoder_length=24, decoder_length=24, encode_vector_size=15, batch_size=1, decode_vector_size=9, grid_size=25, forecast_factor=0)
            # model = APNet(encoder_length=24, decoder_length=24, encode_vector_size=15, batch_size=1, decode_vector_size=9, grid_size=25, forecast_factor=0)
        model.set_data(sp_vectors, [0], None, china_vectors)
        with tf.device('/%s' % p.device):
            model.init_ops(is_train=False)
            saver = tf.train.Saver()
        tconfig = get_gpu_options(False)        
        with tf.Session(config=tconfig) as session:
            model.assign_datasets(session)    
            preds_pm25 = realtime_execute(model, session, saver, decoder_length, p.prediction_weight_pm25)
            model.forecast_factor = 1
            preds_pm10 = realtime_execute(model, session, saver, decoder_length, p.prediction_weight_pm10)
            china_vectors = np.array(china_vectors)
            # print("china", china_vectors.shape)
            # tf.reset_default_graph()
            # session.close()
            if is_close_cuda:
                cuda.select_device(0)
                cuda.close()
        return (preds_pm25, preds_pm10), timestamp, np.transpose(china_vectors[:,:2] * 500)
    else:
        return ([],[]), [], []

コード例 #14

0

ファイルを表示

def cleanup():
    from keras import backend as K

    K.clear_session()

    from numba import cuda
    cuda.select_device(0)
    cuda.close()

コード例 #15

0

ファイルを表示

ファイル: cems13d.py プロジェクト: robertstrauss/tsunami

def testin():
    N = 2000
    M = 2000
    h = np.asarray(np.float32(2) + np.random.random((N, M)), dtype=np.float32)
    n = np.asarray(np.random.random((N, M)), dtype=np.float32)
    u = np.asarray(np.random.random((N + 1, M)), dtype=np.float32)
    v = np.asarray(np.random.random((N, M + 1)), dtype=np.float32)
    f = np.asarray(np.random.random((N, M)), dtype=np.float32)
    dx = np.float32(0.1)
    dy = np.float32(0.2)
    #p.g = np.float32(1.0)
    nu = np.float32(1.0)

    out_u = np.asarray(np.random.random((M, N + 1)), dtype=np.float32)

    threadsperblock = (16, 32)  # (16,16)
    blockspergrid_x = (u.shape[0] + threadsperblock[0]) // threadsperblock[0]
    blockspergrid_y = (u.shape[1] + threadsperblock[1]) // threadsperblock[1]
    blockspergrid = (blockspergrid_x, blockspergrid_y)

    print("here we go", u.shape)
    print("blocks per grid", blockspergrid)
    print("threads per block", threadsperblock)
    try:
        for cu_u_driver in (cu_u_driver_global, ):
            print(cu_u_driver)
            #           h1 = cuda.to_device(h)
            #           n1 = cuda.to_device(n)
            #           u1 = cuda.to_device(u)
            v1 = cuda.to_device(v)
            #           f1 = cuda.to_device(f)
            out_u1 = cuda.to_device(out_u)
            ts = []
            for i in range(10):
                t = mytime()  # time.process_time()
                for j in range(100):
                    cu_u_driver[blockspergrid, threadsperblock](v1, out_u1)
                    cu_u_driver[blockspergrid, threadsperblock](v1, out_u1)
                    cu_u_driver[blockspergrid, threadsperblock](v1, out_u1)
                    cu_u_driver[blockspergrid, threadsperblock](v1, out_u1)
                    cu_u_driver[blockspergrid, threadsperblock](v1, out_u1)
                    cu_u_driver[blockspergrid, threadsperblock](v1, out_u1)
                    cu_u_driver[blockspergrid, threadsperblock](v1, out_u1)
                    cu_u_driver[blockspergrid, threadsperblock](v1, out_u1)
                    cu_u_driver[blockspergrid, threadsperblock](v1, out_u1)
                    cu_u_driver[blockspergrid, threadsperblock](v1, out_u1)

                cuda.synchronize()
                t2 = mytime()  # time.process_time()
                ts.append(t - t2)
            #  time.sleep(1)
            print("cuda")
            print(np.median(ts), np.min(ts), np.max(ts), np.std(ts))
            print(ts)
    finally:
        print("cuda closer")
        cuda.close()
    print("all done")

コード例 #16

0

ファイルを表示

ファイル: gpu.py プロジェクト: Miao-cc/dedisperse_gpu_FAST

def gpu_dmt(cand, device=0):
    """

    GPU DM-Time bow-tie (by rolling the array)

    Args:
        cand: Candidate instance
        device (int): GPU ID

    Returns:
        candidate object

    """
    cuda.select_device(device)
    chan_freqs = cuda.to_device(np.array(cand.chan_freqs, dtype=np.float32))
    dm_list = cuda.to_device(np.linspace(0, 2 * cand.dm, 256, dtype=np.float32))
    dmt_return = cuda.to_device(np.zeros((256, cand.data.shape[0]), dtype=np.float32))
    cand_data_in = cuda.to_device(np.array(cand.data.T, dtype=cand.data.dtype))

    @cuda.jit
    def gpu_dmt(cand_data_in, chan_freqs, dms, cand_data_out, tsamp):
        ii, jj, kk = cuda.grid(3)
        if (
            ii < cand_data_in.shape[0]
            and jj < cand_data_in.shape[1]
            and kk < dms.shape[0]
        ):
            disp_time = int(
                -1
                * 4148808.0
                * dms[kk]
                * (1 / (chan_freqs[0]) ** 2 - 1 / (chan_freqs[ii]) ** 2)
                / 1000
                / tsamp
            )
            cuda.atomic.add(
                cand_data_out,
                (kk, jj),
                cand_data_in[ii, (jj + disp_time) % cand_data_in.shape[1]],
            )

    threadsperblock = (16, 8, 8)
    blockspergrid_x = math.ceil(cand_data_in.shape[0] / threadsperblock[0])
    blockspergrid_y = math.ceil(cand_data_in.shape[1] / threadsperblock[1])
    blockspergrid_z = math.ceil(dm_list.shape[0] / threadsperblock[2])

    blockspergrid = (blockspergrid_x, blockspergrid_y, blockspergrid_z)

    gpu_dmt[blockspergrid, threadsperblock](
        cand_data_in, chan_freqs, dm_list, dmt_return, float(cand.your_header.tsamp)
    )

    cand.dmt = dmt_return.copy_to_host()

    cuda.close()

    return cand

コード例 #17

0

ファイルを表示

def newthread():
    cuda.select_device(0)
    stream = cuda.stream()
    A = np.arange(100)
    dA = cuda.to_device(A, stream=stream)
    stream.synchronize()
    del dA
    del stream
    cuda.close()

コード例 #18

0

ファイルを表示

ファイル: test_reset_device.py プロジェクト: ASPP/numba

 def newthread():
     devices = range(driver.get_device_count())
     print('Devices', devices)
     for _ in range(2):
         for d in devices:
             cuda.select_device(d)
             print('Selected device', d)
             cuda.close()
             print('Closed device', d)

コード例 #19

0

ファイルを表示

ファイル: test_reset_device.py プロジェクト: winstonewert/numba

 def newthread():
     devices = range(driver.get_device_count())
     print('Devices', devices)
     for _ in range(2):
         for d in devices:
             cuda.select_device(d)
             print('Selected device', d)
             cuda.close()
             print('Closed device', d)

コード例 #20

0

ファイルを表示

ファイル: test_reset_device.py プロジェクト: Alexhuszagh/numba

 def newthread(exception_queue):
     try:
         devices = range(driver.get_device_count())
         for _ in range(2):
             for d in devices:
                 cuda.select_device(d)
                 cuda.close()
     except Exception as e:
         exception_queue.put(e)

コード例 #21

0

ファイルを表示

ファイル: test_reset_device.py プロジェクト: ChiLi90/LifetimeFit

 def newthread(exception_queue):
     try:
         devices = range(driver.get_device_count())
         for _ in range(2):
             for d in devices:
                 cuda.select_device(d)
                 cuda.close()
     except Exception as e:
         exception_queue.put(e)

コード例 #22

0

ファイルを表示

 def clear(self):
     '''
     Used to clear the current session from the GPU.
     '''
     K.clear_session()
     gc.collect()
     del self.model
     for gpu in range(len(cuda.gpus)):
         cuda.select_device(gpu)
         cuda.close()

コード例 #23

0

ファイルを表示

ファイル: mpi_master.py プロジェクト: ZhouHUB/pyIID

def mpi_fq(n_nodes, m_list, q, scatter_array, qbin):
    """
    Breakup the job across the GPU enabled nodes

    Parameters
    ----------
    n_nodes: int
        Number of allocated nodes, not including the head node
    Returns
    -------
    list of floats:
        Amount of memory per GPU
    """
    from mpi4py import MPI

    kernel_loc = inspect.getfile(mpi_fq_worker)
    comm = MPI.COMM_WORLD.Spawn(
        sys.executable,
        args=[kernel_loc],
        maxprocs=n_nodes
    )
    n_cov = 0
    status = MPI.Status()
    m_list += ([StopIteration] * n_nodes)
    p = None
    thread_q = []
    for m in m_list:
        if m is StopIteration:
            msg = m
        else:
            msg = (q, scatter_array, qbin, m, n_cov)

        # If the thread on the main node is done, or not started:
        # give a problem to it
        if p is None or p.is_alive() is False:
            cuda.close()
            p = Thread(
                target=subs_fq, args=(
                    cuda.gpus.lst[0], q, scatter_array, thread_q,
                    qbin, m,
                    n_cov))
            p.start()
        else:
            comm.recv(source=MPI.ANY_SOURCE, status=status)
            comm.send(obj=msg, dest=status.Get_source())
        if type(m) == int:
            n_cov += m
    p.join()
    # Make certain we have covered all the atoms
    assert n_cov == len(q)
    # TODO: Make Numpy based Gather for faster memory transfer or Sum Reduce
    reports = comm.gather(root=MPI.ROOT)
    comm.Disconnect()
    reports += thread_q
    return reports

コード例 #24

0

ファイルを表示

ファイル: test_file.py プロジェクト: guiyomj/vocket-ocr-flask-server

def test_api_post():

    path = "C:\\Users\\CAU\\Desktop\\capstone\\text_recognition\demo_image"
    if os.path.exists(path):
        for file in os.scandir(path):
            os.remove(file.path)

    imagefile = request.files['image']
    filename = werkzeug.utils.secure_filename(imagefile.filename)
    print("\nReceived image File name : " + imagefile.filename)
    imagefile.save("./text_detection/test/" + filename)
    # time.sleep(5)
    detection.run_detection()
    # time.sleep(5)
    img_files, img_bbox = load_files()
    crop_img(img_files, img_bbox)
    pred_str = recognition.run_recognition()

    # underline detection
    cfg = PredictionConfig()
    # define the model
    model = MaskRCNN(mode='inference', model_dir='./', config=cfg)
    # load model weights
    model_path = 'mask_rcnn_underline_cfg_0020.h5'
    model.load_weights(model_path, by_name=True)
    temp = cv2.imread("./text_detection/test/androidFlask.jpg")

    yhat = model.detect([temp], verbose=0)[0]
    print(len(yhat['rois']))
    # [l, t], [r, t], [r, b], [l, b]
    for i, file in enumerate(img_files):
        txt = pd.read_csv(img_bbox[i], header=None)
        df = pd.DataFrame(columns=["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4", "result_text"])
        # compare

        for i, bb in enumerate(txt.values):
            x1, y1, x2, y2, x3, y3, x4, y4 = bb
            # textbb = [x1, y1, x3, y3]
            for underline in yhat['rois']:
                uy1, ux1, uy2, ux2 = underline
                if (ux1 + ux2) / 2 > x1 and (ux1 + ux2) / 2 < x3 and y1 < uy1 and uy1 < y3:
                    df = df.append({"x1": x1, "y1": y1, "x2": x2, "y2": y2, "x3": x3, "y3": y3, "x4": x4, "y4": y4,
                                    "result_text": pred_str[i]}, ignore_index=True)
                    temp = cv2.rectangle(temp, (x1, y1), (x3, y3), (0, 0, 255), 3)
                # top-left corner and bottom-right corner of rectangle.

    df.to_csv("./result.csv")
    cv2.imwrite("./result.jpg", temp)
    from keras import backend as K
    K.clear_session()
    cuda.select_device(0)
    cuda.close()

    del model
    return "done"

コード例 #25

0

ファイルを表示

def main(argv):
    generate_trajectory()
    learn_direction()

    if FLAGS.evaluation in ['qualitative', 'quantitative']:
        latent_traversal()
    if FLAGS.evaluation == 'quantitative':
        cuda.select_device(0)
        cuda.close()
        get_barycenter()
        measure_perf()

コード例 #26

0

ファイルを表示

ファイル: test_select_device.py プロジェクト: genba/numba

def newthread(exception_queue):
    try:
        cuda.select_device(0)
        stream = cuda.stream()
        A = np.arange(100)
        dA = cuda.to_device(A, stream=stream)
        stream.synchronize()
        del dA
        del stream
        cuda.close()
    except Exception as e:
        exception_queue.put(e)

コード例 #27

0

ファイルを表示

ファイル: Miscellaneous.py プロジェクト: Sherrykexin/Systematic-Predicate-Abstraction-using-Machine-Learning

def GPU_switch(GPU):
    if GPU == False:
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
        tf.keras.backend.clear_session()
    else:
        #watch nvidia-smi
        cuda.select_device(0)
        cuda.close()
        print('CUDA memory released: GPU0')
        gpus = tf.config.experimental.list_physical_devices('GPU')
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)

コード例 #28

0

ファイルを表示

 def newthread(exception_queue):
     try:
         devices = range(driver.get_device_count())
         print('Devices', devices)
         for _ in range(2):
             for d in devices:
                 cuda.select_device(d)
                 print('Selected device', d)
                 cuda.close()
                 print('Closed device', d)
     except Exception as e:
         exception_queue.put(e)

コード例 #29

0

ファイルを表示

ファイル: test_reset_device.py プロジェクト: ymarfoq/outilACVDesagregation

 def newthread(exception_queue):
     try:
         devices = range(driver.get_device_count())
         print('Devices', devices)
         for _ in range(2):
             for d in devices:
                 cuda.select_device(d)
                 print('Selected device', d)
                 cuda.close()
                 print('Closed device', d)
     except Exception as e:
         exception_queue.put(e)

コード例 #30

0

ファイルを表示

def newthread(exception_queue):
    try:
        cuda.select_device(0)
        stream = cuda.stream()
        A = np.arange(100)
        dA = cuda.to_device(A, stream=stream)
        stream.synchronize()
        del dA
        del stream
        cuda.close()
    except Exception as e:
        exception_queue.put(e)

コード例 #31

0

ファイルを表示

ファイル: gpu.py プロジェクト: Miao-cc/dedisperse_gpu_FAST

def gpu_dedisperse(cand, device=0):
    """

    GPU dedispersion (by rolling the array)

    Args:
        cand: Candidate instance
        device (int): GPU ID

    Returns:
        candidate object

    """
    cuda.select_device(device)
    chan_freqs = cuda.to_device(np.array(cand.chan_freqs, dtype=np.float32))
    cand_data_in = cuda.to_device(np.array(cand.data.T))
    cand_data_out = cuda.to_device(np.zeros_like(cand.data.T))

    @cuda.jit
    def gpu_dedisp(cand_data_in, chan_freqs, dm, cand_data_out, tsamp):
        ii, jj = cuda.grid(2)
        if ii < cand_data_in.shape[0] and jj < cand_data_in.shape[1]:
            disp_time = int(
                -4148808.0
                * dm
                * (1 / (chan_freqs[0]) ** 2 - 1 / (chan_freqs[ii]) ** 2)
                / 1000
                / tsamp
            )
            cand_data_out[ii, jj] = cand_data_in[
                ii, (jj + disp_time) % cand_data_in.shape[1]
            ]

    threadsperblock = (32, 32)
    blockspergrid_x = math.ceil(cand_data_in.shape[0] / threadsperblock[0])
    blockspergrid_y = math.ceil(cand_data_in.shape[1] / threadsperblock[1])

    blockspergrid = (blockspergrid_x, blockspergrid_y)

    gpu_dedisp[blockspergrid, threadsperblock](
        cand_data_in,
        chan_freqs,
        float(cand.dm),
        cand_data_out,
        float(cand.your_header.tsamp),
    )

    cand.dedispersed = cand_data_out.copy_to_host().T

    cuda.close()

    return cand

コード例 #32

0

ファイルを表示

def reset_keras(device=0):
    cuda.select_device(device)
    cuda.close()
    print(gc.collect()) # if it's done something you should see a number being outputted

    K.clear_session()
    sess = K.get_session()
    sess.close()
    # use the same config as you used to create the session
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 1
    config.gpu_options.visible_device_list = "0"
    K.set_session(tf.Session(config=config))

コード例 #33

0

ファイルを表示

def Clear():
    from numba import cuda
    device = cuda.get_current_device()
    device.reset()
    #cuda.current_context().trashing.clear()
    s = cuda.current_context().get_memory_info()
    print(s)
    cuda.current_context().deallocations.clear()
    s = cuda.current_context().get_memory_info()
    print(s)
    cuda.select_device(0)
    #do tf stuff
    cuda.close()

コード例 #34

0

ファイルを表示

ファイル: mpi_master.py プロジェクト: s-dale/pyIID

def mpi_fq(n_nodes, m_list, q, scatter_array, qbin):
    """
    Breakup the job across the GPU enabled nodes

    Parameters
    ----------
    n_nodes: int
        Number of allocated nodes, not including the head node
    Returns
    -------
    list of floats:
        Amount of memory per GPU
    """
    from mpi4py import MPI

    kernel_loc = inspect.getfile(mpi_fq_worker)
    comm = MPI.COMM_WORLD.Spawn(sys.executable,
                                args=[kernel_loc],
                                maxprocs=n_nodes)
    n_cov = 0
    status = MPI.Status()
    m_list += ([StopIteration] * n_nodes)
    p = None
    thread_q = []
    for m in m_list:
        if m is StopIteration:
            msg = m
        else:
            msg = (q, scatter_array, qbin, m, n_cov)

        # If the thread on the main node is done, or not started:
        # give a problem to it
        if p is None or p.is_alive() is False:
            cuda.close()
            p = Thread(target=subs_fq,
                       args=(cuda.gpus.lst[0], q, scatter_array, thread_q,
                             qbin, m, n_cov))
            p.start()
        else:
            comm.recv(source=MPI.ANY_SOURCE, status=status)
            comm.send(obj=msg, dest=status.Get_source())
        if type(m) == int:
            n_cov += m
    p.join()
    # Make certain we have covered all the atoms
    assert n_cov == len(q)
    # TODO: Make Numpy based Gather for faster memory transfer or Sum Reduce
    reports = comm.gather(root=MPI.ROOT)
    comm.Disconnect()
    reports += thread_q
    return reports

コード例 #35

0

ファイルを表示

 def clear_context(self) -> None:
     try:
         print("Clearing Context")
         devices_list: List[cuda.cudadrv.devices.
                            _DeviceContextManager] = cuda.list_devices().lst
         for device in devices_list:
             print("GPU device id:{}".format(device.id))
             cuda.select_device(device.id)
             cuda.close()
             device.reset()
     except cuda.cudadrv.error.CudaSupportError as e:
         pass
     finally:
         print("Context Cleared")

コード例 #36

0

ファイルを表示

ファイル: mpi_gpu_avail.py プロジェクト: ZhouHUB/pyIID

__author__ = 'christopher'
if __name__ == '__main__':
    from mpi4py import MPI
    from numba import cuda

    comm = MPI.Comm.Get_parent()
    rank = comm.Get_rank()
    meminfo = int(cuda.current_context().get_memory_info()[0])
    cuda.close()

    comm.gather(sendobj=meminfo, root=0)
    comm.Disconnect()

コード例 #37

0

ファイルを表示

ファイル: test_context_stack.py プロジェクト: seibert/numba

 def setUp(self):
     # Reset before testing
     cuda.close()

コード例 #38

0

ファイルを表示

ファイル: test_context_stack.py プロジェクト: seibert/numba

 def tearDown(self):
     cuda.close()