Example #1
0
    def image_summary(
        self,
        samples: Optional[List["IMCSample"]] = None,
        rois: List["ROI"] = None,
    ):
        raise NotImplementedError
        from imc.utils import lacunarity, fractal_dimension

        rois = self._get_rois(samples, rois)
        roi_names = [r.name for r in rois]
        densities = pd.Series(
            {roi.name: roi.cells_per_area_unit()
             for roi in rois},
            name="cell density",
        )
        lacunarities = pd.Series(
            parmap.map(lacunarity, [roi.cell_mask_o for roi in rois],
                       pm_pbar=True),
            index=roi_names,
            name="lacunarity",
        )
        fractal_dimensions = pd.Series(
            parmap.map(
                fractal_dimension,
                [roi.cell_mask_o for roi in rois],
                pm_pbar=True,
            ),
            index=roi_names,
            name="fractal_dimension",
        )

        morphos = pd.DataFrame(
            [densities * 1e4, lacunarities, fractal_dimensions]).T
Example #2
0
    def mapper(self):

        baseurl = 'https://www.rottentomatoes.com/critics/authors?letter='
        for i in range(len(alpha)):
            realurl = baseurl + quote_plus(alpha[i])
            url.append(realurl)
        parmap.map(self.crawler, url, pm_pbar=True, pm_processes=num_cores)
Example #3
0
 def test_map_progress(self):
     items = range(4)
     pfalse = parmap.map(_wait, items, pm_pbar=False)
     ptrue = parmap.map(_wait, items, pm_pbar=True)
     noparmap = list(map(_wait, items))
     self.assertEqual(pfalse, ptrue)
     self.assertEqual(pfalse, noparmap)
Example #4
0
 def test_map_kwargs(self):
     items = range(2)
     pfalse = parmap.map(_fun_with_keywords, items, pm_parallel=False, a=10)
     ptrue = parmap.map(_fun_with_keywords, items, pm_parallel=True, a=10)
     noparmap = [ x + 10 + _DEFAULT_B for x in items]
     self.assertEqual(pfalse, ptrue)
     self.assertEqual(pfalse, noparmap)
Example #5
0
 def test_map_progress(self):
     items = range(4)
     pfalse = parmap.map(_wait, items, pm_pbar=False)
     ptrue = parmap.map(_wait, items, pm_pbar=True)
     noparmap = list(map(_wait, items))
     self.assertEqual(pfalse, ptrue)
     self.assertEqual(pfalse, noparmap)
Example #6
0
 def test_map_kwargs(self):
     items = range(2)
     pfalse = parmap.map(_fun_with_keywords, items, pm_parallel=False, a=10)
     ptrue = parmap.map(_fun_with_keywords, items, pm_parallel=True, a=10)
     noparmap = [x + 10 + _DEFAULT_B for x in items]
     self.assertEqual(pfalse, ptrue)
     self.assertEqual(pfalse, noparmap)
Example #7
0
 def test_map(self):
     items = range(4)
     pfalse = parmap.map(_identity, items, parallel=False)
     ptrue = parmap.map(_identity, items, parallel=True)
     noparmap = list(map(_identity, items))
     self.assertEqual(pfalse, ptrue)
     self.assertEqual(pfalse, noparmap)
Example #8
0
def get_labeled_patches(imgs, gts, n_segments=100, thres1=0.2, thres2=0.2):
    """
    Get all the patches from the set of images.
    :param imgs: images
    :param gts: masks
    :param n_segments: max number of patches for image
    :param thres1: label = 1 if a proportion bigger than thres1 in the patch is masked as 1
    :param thres2: label = 1 if pixels masked as 1 in patch / total number of pixels masked as 1 in the picture > thres2
    :return: patches: list of patches, size [len(img), n_patches_per_image, 80,80]
    :return: labels: list of labels per each patch, size [len(img), n_patches_per_image]
    """
    n = len(imgs)
    SLIC_list = np.asarray([
        slic(imgs[i, :], n_segments, compactness=20, sigma=10)
        for i in range(len(imgs))
    ])

    # run box function to find all superpixel patches sizes
    boxes = parmap.map(box, SLIC_list)

    # populating x_train
    patches = parmap.map(xpatchify, zip(imgs, SLIC_list, boxes))

    # labels
    labels = parmap.map(patch_cat, zip(gts, SLIC_list), thres1, thres2)

    return patches, labels
def build_HDF5(size):
    """
    Gather the data in a single HDF5 file.
    """

    df_attr = parse_attibutes()
    list_col_labels = [
        c for c in df_attr.columns.values
        if c not in ["person", "imagenum", "image_path"]
    ]

    # Put train data in HDF5
    hdf5_file = os.path.join(data_dir, "lfw_%s_data.h5" % size)
    with h5py.File(hdf5_file, "w") as hfw:

        data_color = hfw.create_dataset("lfw_%s_color" % size,
                                        (0, 3, size, size),
                                        maxshape=(None, 3, size, size),
                                        dtype=np.uint8)

        data_sketch = hfw.create_dataset("lfw_%s_sketch" % size,
                                         (0, 1, size, size),
                                         maxshape=(None, 1, size, size),
                                         dtype=np.uint8)

        label = hfw.create_dataset("labels",
                                   data=df_attr[list_col_labels].values)
        label.attrs["label_names"] = list_col_labels

        arr_img = df_attr.image_path.values

        num_files = len(arr_img)
        chunk_size = 1000
        num_chunks = num_files / chunk_size
        arr_chunks = np.array_split(np.arange(num_files), num_chunks)

        for chunk_idx in tqdm(arr_chunks):

            list_img_path = arr_img[chunk_idx].tolist()

            output = parmap.map(format_image, list_img_path, size)
            output_sketch = parmap.map(format_sketch, list_img_path, size)

            arr_img_color = np.concatenate(output, axis=0)

            arr_img_sketch = np.concatenate(output_sketch, axis=0)

            # Resize HDF5 dataset
            data_color.resize(data_color.shape[0] + arr_img_color.shape[0],
                              axis=0)

            data_color[-arr_img_color.shape[0]:] = arr_img_color.astype(
                np.uint8)

            data_sketch.resize(data_sketch.shape[0] + arr_img_sketch.shape[0],
                               axis=0)

            data_sketch[-arr_img_sketch.shape[0]:] = arr_img_sketch.astype(
                np.uint8)
    def mix_rir_and_sound_source(self, mode):
        """
        convolve speech and speech_rir (random selected)
        :param mode: tr/cv/tt
        :return: save multi-channel speech
        """
        # path set
        save_path = os.getcwd() + '/multi_channel_speech/' + mode
        rir_path = os.getcwd() + '/rir/' + mode
        if mode == 'cv':
            rir_path = os.getcwd() + '/rir/tr'
        spc_path = '/home/dail/PycharmProjects/DCCRN/data/tr/clean'

        # rir list and sound source list
        rir_list = glob(rir_path + '/*/*.npz')
        spc_list = glob(spc_path + '/*.wav')

        # generate random rir index
        spc_list.sort()
        _use_par = False

        if _use_par == True:
            if mode == 'tr':
                _ = parmap.map(self.convolve_and_save_rir_tr,
                               spc_list,
                               pm_pbar=True,
                               pm_processes=28)
            if mode == 'cv':
                _ = parmap.map(self.convolve_and_save_rir_cv,
                               spc_list,
                               pm_pbar=True,
                               pm_processes=28)
            if mode == 'tt':
                _ = parmap.map(self.convolve_and_save_rir_tt,
                               spc_list,
                               pm_pbar=True,
                               pm_processes=28)

        else:
            for i, _spc in enumerate(tqdm(spc_list)):

                # read audio file
                # aud, fs = librosa.core.load(_spc, sr=None, mono=False)
                aud, fs = audioread(_spc)

                if len(aud.shape) != 1:
                    aud = aud[:, 0]

                #aud.shape[1]
                idx_s = np.random.randint(0, len(rir_list))
                npz = np.load(rir_list[idx_s], allow_pickle=True)

                # convolve
                rir = npz['rir']
                Y = ss.convolve(rir, aud[:, np.newaxis])
                audiowrite(
                    save_path + '/' + rir_list[idx_s].split('/')[-2] + '_' +
                    rir_list[idx_s].split('/')[-1].split('.n')[0] + '_' +
                    _spc.split('/')[-1], Y, fs)
Example #11
0
def handle_batch(batch, instances):
	print 'Downloading batch '+batch+'... ',
	dir = 'downloaded_'+batch
	dnss = map(lambda (x,y): (dir, x, y), instances)
	os.makedirs(dir)
	parmap.map(handle_instance, dnss)
	parmap.map(extract_instance, dnss)
	print 'Done'
Example #12
0
    def store_apps_info(self, app_ids: [str]) -> None:
        """Adds the specified apps to the data set by retrieving all the info
        needed and appending them to the list of apps (kept in _info_file).

        :param app_ids: array of app ids.
        """
        app_ids = set(app_ids)
        parmap.map(self.store_app_info, app_ids)
Example #13
0
def iedbPredict(method, hlas, peptides, cpus=1, verbose=False):
    """Generate HLA:peptide binding affinity (log-IC50) predictions using
    the tools distributed by IEDB.

    Predictions are computed for all HLA:peptide combinations.

    Parameters
    ----------
    method : string
        Prediction method (e.g. netmhcpan, smm, ann)
        If RAND is specified then random predictions are returned.
    hlas : list
        List of HLA alleles in the format A_0201 or A*0201
    peptides : list of strings
        List of peptides, required to be all be of the same length.
    cpus : int
        Number of cores to use in parallelizing the predictions.

    Returns
    -------
    df : pd.DataFrame
        Columns: method, hla, peptide, core, pred"""

    if verbose:
        """Create console handler and set level to debug"""
        logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(asctime)s:%(message)s')
        logging.info('HLA prediction initialized for %d HLA allele(s) using method %s on %d CPU(s)', len(hlas), method, cpus)


    cols = ['method', 'hla', 'peptide', 'core', 'pred']
    
    if method == 'RAND':
        results = dict(method=[], hla=[], peptide=[], core=[], pred=[])
        for h, pep in itertools.product(hlas, peptides):
            results['method'].append('RAND')
            results['hla'].append(h)
            results['peptide'].append(pep)
            results['core'].append(pep)
            results['pred'].append(np.random.rand())
        resDf = pd.DataFrame(results, columns=cols)
    else:
        if cpus > 1:
            result = parmap.map(_predictOneHLA, hlas, method, peptides, verbose, pool=Pool(processes=cpus))
        else:
            result = parmap.map(_predictOneHLA, hlas, method, peptides, verbose, parallel=False)

        """Remove None's"""
        resDf = pd.concat([r for r in result if not r is None], axis=0)

        """Take the log of the prediction if neccessary."""
        if resDf.pred.max() > 100:
            resDf['pred'] = np.log(resDf.pred)

        if verbose:
            logging.info('Completed %d predictions (expected %d)', resDf.shape[0], len(hlas) * len(peptides))
    return resDf
def delete_operation(project_id, mode):
    table_names = return_tables(project_id, schema_name)
    if mode == 'prefix':
        deletion_table_names = list(filter(lambda k: pattern in k,
                                           table_names))
        print(deletion_table_names)
        results = parmap.map(delete_table, deletion_table_names, project_id)
        #results = p.map(delete_table, deletion_table_names)
        print('tables_deleted')
    elif mode == 'array':
        results = parmap.map(delete_table, reqd_table_names, project_id)
Example #15
0
def test_pgen_with_parmap():
    """
    Really simple example of using multiple cpus to 
    speed up computation of pgens with olga.
    """
    import parmap
    from tcrdist.pgen import OlgaModel
    olga_beta = OlgaModel(chain_folder="human_T_beta", recomb_type="VDJ")
    parmap.map(olga_beta.compute_aa_cdr3_pgen, [
        'CASSYRVGTDTQYF', 'CATSTNRGGTPADTQYF', 'CASQGDSFNSPLHF',
        'CASSPWTGSMALHF'
    ])
Example #16
0
def load_chip_singleTask(input_dirs, tf):
    blacklist = make_blacklist()

    print('Loading and sorting BED file(s)')
    chip_bed_list, relaxed_bed_list = zip(*parmap.map(get_chip_bed, input_dirs, tf, blacklist.fn))

    # Later we want to gather negative windows from the genome that do not overlap
    # with a blacklisted or ChIP region
    print('Generating regions to exclude for negative windows')
    nonnegative_regions_bed_file_list = parmap.map(nonnegative_wrapper, relaxed_bed_list, blacklist.fn)
    nonnegative_regions_bed_list = [BedTool(i) for i in nonnegative_regions_bed_file_list]
    return chip_bed_list, nonnegative_regions_bed_list
Example #17
0
def insta_image_crawler_main(tagAndUrls, imgDir, iteration):
    # 'tagAndUrls'는 검색할 #Tag와 그에 대한 Url의 1:1 쌍 구성들이다. 'imgDir'은 내려받은 image를 저장할 경로이다.
    # 'iteration'은 스크롤 다운을 반복할 횟수이다.
    imgList = []  # 입력한 #Tag에 대해 스크롤다운하여 가져온 이미지들의 Url을 모아 둘 배열을 선언한다.

    while True:
        # 멀티프로세싱을 위해, 입력한 #Tag(에 대한 Url)들을 프로세스를 수행할 PC의 CPU 코어 갯수만큼씩 잘라 작업을 수행할 것이다.
        if len(tagAndUrls
               ) <= cores:  # #Tag의 수가 Core의 수 이하라면, #Tag의 수 만큼만 작업을 분할해 수행한다.
            indexedtagAndUrls = list(
                enumerate(np.array_split(tagAndUrls, len(tagAndUrls))))
            sleepTimeBias = len(tagAndUrls) * 0.5
            # 스크롤 다운이 동시에 병렬로 진행되므로, 스크롤 다운 수행 후 새 페이지에 이미지가 로드되길 기다릴 시간의 기본값에 더할 연장값을 산출한다.
            for imgListPerCore in parmap.map(image_crawler, indexedtagAndUrls,
                                             iteration, sleepTimeBias):
                # 'parmap.map'으로 분할한 작업의 수에 따라 멀티프로세싱을 수행한다.
                for img in imgListPerCore:
                    imgList.append(
                        img)  # 각 분할 작업으로부터 돌려받은 이미지의 Url 값들을 'imgList'에 더한다.
            break  # 남아있는 #Tag의 수가 코어 수 이하라는 것은 뒤에 더 남은 #Tag가 없다는 것을 의미하므로, 위의 작업이 수행되면 반복과정을 종료한다.
        else:  # 남아있는 #Tag의 수가 현재 PC의 CPU 코어 수보다 많을 경우, 다음의 과정을 진행한다.
            indexedtagAndUrls = list(
                enumerate(np.array_split(tagAndUrls[0:cores], cores)))
            del tagAndUrls[0:cores]
            # 'tagAndUrls'로부터 #Tag를 CPU 코어 수 만큼 가져온 다음, 전체 목록인 'tagAndUrls'에서 해당 부분은 삭제한다.
            sleepTimeBias = cores * 0.5
            # 스크롤 다운이 동시에 병렬로 진행되므로, 스크롤 다운 수행 후 새 페이지에 이미지가 로드되길 기다릴 시간의 기본값에 더할 연장값을 산출한다.
            for imgListPerCore in parmap.map(image_crawler, indexedtagAndUrls,
                                             iteration, sleepTimeBias):
                # 'parmap.map'으로 분할한 작업의 수에 따라 멀티프로세싱을 수행한다.
                for img in imgListPerCore:
                    imgList.append(
                        img)  # 각 분할 작업으로부터 돌려받은 이미지의 Url 값들을 'imgList'에 더한다.

    indexedImgList = list(enumerate(np.array_split(list(set(imgList)), cores)))
    # 가져와 단순 추가해 둔 이미지 Url들에 대해, 단순한 Url값 기준 중복제거 처리를 수행한다.(이미지의 내용을 기준으로 비교하는 것은 아니다.)
    # 중복제거 처리 후, 멀티프로세싱을 위해 전체 양을 현재 PC의 Core의 수로 나눠 쪼개 둔다.

    fileNames = []  # 저장된 파일들의 이름을 적재할 배열을 선언한다.
    cnt = 0  # 저장된 파일들을 계수할 변수를 0으로 초기화하며 선언한다.

    for fileNamesPerCore, cntPerCore in parmap.map(image_saver, indexedImgList,
                                                   imgDir):
        # 위에서 분할해 둔 Url 목록을, 파일 저장 목표 경로인 'imgDir' Parameter와 함께
        # 'parmap.map'을 통해 파일 저장 메소드에 넘겨 CPU Core 수 대로 멀티프로세싱(병렬처리)를 수행한다.
        # 각각의 프로세스로부터 반환받은 파일 이름들과 갯수에 대해 다음의 과정을 수행하게 된다.
        for fileName in fileNamesPerCore:
            fileNames.append(
                fileName)  # 각각의 병렬 프로세스로부터 넘겨받은 파일명 목록을 전체 목록에 더해 넣는다.
        cnt += cntPerCore  # 개별 프로세스 각각의 파일 저장 갯수를 총 갯수에 차례대로 모두 더한다.

    return fileNames, cnt  # 모든 병렬 프로세스의 파일명 목록과 갯수를 합한 것들을 호출측에 각각 반환한다.
Example #18
0
    def pairwise_filter_conv_parallel(self):

        # Cat: TODO: this may still crash memory in some cases; can split into additional bits
        units = np.array_split(np.unique(self.up_up_map), self.n_processors)
        if self.multi_processing:
            parmap.map(parallel_conv_filter,
                       list(zip(np.arange(len(units)), units)),
                       self.n_time,
                       self.up_up_map,
                       self.unit_overlap,
                       self.up_factor,
                       self.vis_chan,
                       self.approx_rank,
                       self.deconv_dir,
                       processes=self.n_processors,
                       pm_pbar=True)
        else:
            units = np.unique(self.up_up_map)

            for k in range(len(units)):
                print("unit : ", k)
                parallel_conv_filter([k, [units[k]]], self.n_time,
                                     self.up_up_map, self.unit_overlap,
                                     self.up_factor, self.vis_chan,
                                     self.approx_rank, self.deconv_dir)

        # load temp_temp saved files from disk due to memory overload otherwise
        temp_array = []
        for i in range(len(units)):
            fname = self.deconv_dir + '/temp_temp_chunk_' + str(i) + '.npy'
            temp_pairwise_conv = np.load(fname)
            temp_array.extend(temp_pairwise_conv)
            os.remove(fname)

        # initialize empty list and fill only correct locations
        print("  gathering temp_temp results...")
        pairwise_conv = []
        for i in range(self.n_unit):
            pairwise_conv.append(None)

        ctr = 0
        for unit2 in np.unique(self.up_up_map):
            pairwise_conv[unit2] = temp_array[ctr]
            ctr += 1

        pairwise_conv = np.array(pairwise_conv)
        print(pairwise_conv.shape)

        # save to disk, don't keep in memory
        np.save(self.deconv_dir + "/pairwise_conv.npy", pairwise_conv)
def build_HDF5(jpeg_dir):
    """
    Gather the data in a single HDF5 file.
    """

    # Put train data in HDF5
    hdf5_file = os.path.join(output_dir, NAME)
    with h5py.File(hdf5_file, "w") as hfw:

            list_img = glob.glob(os.path.join(jpeg_dir, "*.jpg"))
            list_img = np.array(list_img)

            data_color = hfw.create_dataset("data",
                                            (0, 3, SIZE, SIZE),
                                            maxshape=(None, 3, SIZE, SIZE),
                                            dtype=np.uint8)

            num_files = len(list_img)
            chunk_size = TO BE SPECIFIED #set to num_files if dataset is small
            num_chunks = num_files / chunk_size
            arr_chunks = np.array_split(np.arange(num_files), num_chunks)

            for chunk_idx in tqdm(arr_chunks):

                list_img_path = list_img[chunk_idx].tolist()
                output = parmap.map(format_image, list_img_path, SIZE, parallel=True)

                arr_img_color = np.concatenate(output, axis=0)

                # Resize HDF5 dataset
                data_color.resize(data_color.shape[0] + arr_img_color.shape[0], axis=0)

                data_color[-arr_img_color.shape[0]:] = arr_img_color.astype(np.uint8)
Example #20
0
def stock_price_ambiguity(max_workers):
    cur = connect(db_path).cursor()
    cur.execute('SELECT name FROM sqlite_master WHERE type="table"')
    stock_list = [row[0] for row in cur]
    cur.close()

    result = File_manager('analyzed', 'ambiguity')
    t = int(result.ver['ambiguity']) * 1000000

    try:
        result_df = pd.read_csv(result.path, index_col=0)
    except EmptyDataError:
        result_df = None

    with Pool(processes=int(max_workers), initializer=init) as p:
        stock_ambiguity = map(worker, stock_list, t, pm_pool=p, pm_pbar=True)
    stock_ambiguity = pd.concat(stock_ambiguity, axis=1)

    if result_df is not None:
        stock_ambiguity = pd.concat([result_df.iloc[:-1], stock_ambiguity])

    stock_ambiguity.sort_index(axis=0, inplace=True)

    result.update_version({'ambiguity': stock_ambiguity.index[-1]})
    stock_ambiguity.to_csv(result.path)
Example #21
0
def calculate_score(model,
                    model_weights_path,
                    musdb_dir='musdb',
                    n_workers=1,
                    n_fft=2048,
                    hop_length=512,
                    slice_duration=2):
    mus = musdb.DB(root_dir=musdb_dir)
    music_list = mus.load_mus_tracks(subsets='test')

    model_weights = torch.load(model_weights_path)
    model.load_state_dict(model_weights)
    # model.cuda()
    scores = parmap.map(calculate_SDR,
                        music_list,
                        pm_processes=n_workers,
                        pm_pbar=True,
                        model=model,
                        n_fft=n_fft,
                        hop_length=hop_length,
                        slice_duration=slice_duration)

    print(scores)
    print(np.mean(scores))
    print(np.median(scores))

    torch.save(scores, 'scores')
Example #22
0
def main():
    data = np.loadtxt("./exampleTargets/C3/K2C3cat.txt", usecols=(0,))

    cfg = pl.loadDefaultConfig()

    taskList = cfg["taskList"]
    #    for i in range(len(taskList)):
    #        taskList[i] = "pl.%s" %(taskList[i])
    #    cfg['taskList'] = taskList

    cfg["taskList"] = taskList[:10]
    print cfg["taskList"]

    count = multiprocessing.cpu_count() - 1
    p = pool.Pool(count)
    print count

    cfg["debug"] = False
    parallel = cfg.get("debug", False)
    parallel = False

    # Pool doesn't release threads even when it runs to completion.
    # Problem not related to exceptions being raised
    with contextlib.closing(pool.Pool(count)) as p:
        out = parmap.map(pl.runOne, data[1:3], cfg, parallel=parallel)
    p.join()
    p.close()

    return out
def disaster_message_preprocessor(max_workers):
    mode = 'w'
    input = File_manager('raw', 'disasterMessage')
    output = File_manager('preprocessed', 'disasterMessage')
    new_ver = input.ver.copy()

    if new_ver['disasterMessage'] == '0':
        return

    raw = read_csv(input.path)
    t = output.ver['disasterMessage']
    new_ver.update(File_manager('ref', 'userdic', format='txt').ver)
    new_ver.update(File_manager('ref', 'stopwords').ver)
    compare = output.compare_version(new_ver)
    header = True
    n = len(compare)

    if n:
        output.update_version(new_ver)
        if n == 1 and compare[0] == 'disasterMessage' and t != '0':
            mode = 'a'
            header = False
            raw = raw.iloc[t:]
    else:
        return

    df_split = array_split(raw, max_workers)
    df_list = parmap.map(tsk,
                         df_split,
                         pm_pbar=True,
                         pm_pool=Pool(max_workers, initializer=initializer))
    concat(df_list).to_csv(output.path, mode=mode, index=False, header=header)
Example #24
0
def load_chip_multiTask(input_dir):
    tfs, chip_beds, merged_chip_bed = get_chip_beds(input_dir)
    print('Removing peaks outside of X chromosome and autosomes')
    chroms, chroms_sizes, genome_bed = get_genome_bed()
    merged_chip_bed = merged_chip_bed.intersect(genome_bed, u=True, sorted=True)

    print('Windowing genome')
    genome_windows = BedTool().window_maker(g=genome_sizes_file, w=genome_window_size,
                                            s=genome_window_step)

    print('Extracting windows that overlap at least one ChIP interval')
    positive_windows = genome_windows.intersect(merged_chip_bed, u=True, f=1.0*(genome_window_size/2+1)/genome_window_size, sorted=True)

    # Exclude all windows that overlap a blacklisted region
    blacklist = make_blacklist()
    
    print('Removing windows that overlap a blacklisted region')
    positive_windows = positive_windows.intersect(blacklist, wa=True, v=True, sorted=True)

    num_positive_windows = positive_windows.count()
    # Binary binding target matrix of all positive windows
    print('Number of positive windows:', num_positive_windows)
    print('Number of targets:', len(tfs))
    # Generate targets
    print('Generating target matrix of all positive windows')
    y_positive = parmap.map(intersect_count, chip_beds, positive_windows.fn)
    y_positive = np.array(y_positive, dtype=bool).T
    print('Positive matrix sparsity', (~y_positive).sum()*1.0/np.prod(y_positive.shape))
    merged_chip_slop_bed = merged_chip_bed.slop(g=genome_sizes_file, b=genome_window_size)
    # Later we want to gather negative windows from the genome that do not overlap
    # with a blacklisted or ChIP region
    nonnegative_regions_bed = merged_chip_slop_bed.cat(blacklist)
    return tfs, positive_windows, y_positive, nonnegative_regions_bed
Example #25
0
def bootstrap_par(reads, smat_raw, B, test_c=0.01, nprocs=1):
    """
    Similarity correction using a bootstrapping procedure for more robust corrections and error
    estimates. Bootstrapping conducted in parallel.

    Args:
    reads -- [numpy.array (M,N)] array with mapping information; reads[m,n]==1,
             if read n mapped to species m.
    smat_raw -- mapping information for similarity matrix. species have same ordering as reads array
    B -- Number of bootstrap samples
    test_c -- For testing: treat species as not present, if estimated concentration is below test_c.
    nprocs -- Number of parallel bootstrap processes to perform.

    Return:
    [p_values, abundances, variances] -- list of floats
    
    """
    # M: Number of species, N: Number of reads
    M,N = reads.shape 

    resList = parmap.map(_boot_iteration, range(B), reads, smat_raw, test_c, B, M, N, processes=nprocs)

    # merging arrays (found, core, fails)
    found = np.concatenate( [x['found'] for x in resList] )
    corr = np.concatenate( [x['corr'] for x in resList] )
    fails = np.concatenate( [x['fails'] for x in resList] )    
    
    # calculations
    p_values = np.mean(fails, axis=0)
    abundances = np.mean(corr, axis=0)
    variances = np.var(corr, axis=0)
    return p_values, abundances, variances
def build_HDF5(jpeg_dir, size=64):
    """
    Gather the data in a single HDF5 file.
    """

    # Put train data in HDF5
    hdf5_file = os.path.join(data_dir, "CelebA_%s_data.h5" % size)
    with h5py.File(hdf5_file, "w") as hfw:

            list_img = glob.glob(os.path.join(jpeg_dir, "*.jpg"))
            list_img = np.array(list_img)

            data_color = hfw.create_dataset("data",
                                            (0, 3, size, size),
                                            maxshape=(None, 3, size, size),
                                            dtype=np.uint8)

            num_files = len(list_img)
            chunk_size = 2000
            num_chunks = num_files / chunk_size
            arr_chunks = np.array_split(np.arange(num_files), num_chunks)

            for chunk_idx in tqdm(arr_chunks):

                list_img_path = list_img[chunk_idx].tolist()
                output = parmap.map(format_image, list_img_path, size, parallel=True)

                arr_img_color = np.concatenate(output, axis=0)

                # Resize HDF5 dataset
                data_color.resize(data_color.shape[0] + arr_img_color.shape[0], axis=0)

                data_color[-arr_img_color.shape[0]:] = arr_img_color.astype(np.uint8)
Example #27
0
    def preproce_db_210301(self):
        # /media/jeonghwan/HDD2/Dataset/clean_label/clean_cut6

        aud_path = '/media/jeonghwan/HDD2/Dataset/clean_label/clean_cut6/'

        npz_list = glob(aud_path + '*.npz')
        plt.figure(1)
        # for i, npz_ in enumerate(tqdm(npz_list)):
        #     npz = np.load(npz_, allow_pickle=True)
        #     aud = npz['aud']
        #     label = npz['label']
        #
        #     # downsample
        #     aud_re = librosa.resample(aud, 48000, 16000) # resampling
        #
        #     # label downsample
        #     label = label[::3]
        #     np.savez(self.save_path + npz_.split('/')[-1], aud=aud_re, label=label)
        #
        #
        #     # For checking
        #     # plt.subplot(2,1 , 1)
        #     # plt.plot(aud_re)
        #     # plt.subplot(2, 1, 2)
        #     # plt.plot(label[::3])
        #     # plt.show()
        #     # exit()

        _ = parmap.map(self.resample_aud_label,
                       npz_list,
                       pm_pbar=True,
                       pm_processes=24)
    def on_receive(self, message):
        """
        :param message:
        :return:
        """

        try:
            W = message["W"]
            bounding_rect = message["bounding_rect"]
            colors = message["colors"]
            vector_field_is_visible = message["vector_field_is_visible"]

            W_colors = [(W[i], colors[i]) for i in xrange(len(vector_field_is_visible)) if vector_field_is_visible[i]]

            if not W_colors:
                return None

            images = parmap.map(vector_field_to_image, zip(W, colors), bounding_rect, pool=self.pool)

            image = reduce(alpha_composite, images) if len(W) > 1 else images[0]

            return image
        except Exception as e:
            print "Exception"
            print e
            return None
Example #29
0
def runAll(func, iterable, config):
    """Run func over every element on iterable in parallel.

    Not yet run or tested.

    Inputs:
    ----------
    func
	(A function) The top level function, e.g runOne(), below

    iterable
	(list, array, etc.) A list of values to operate on.

    config
	(Clipboard) A configuration clipboard.
    """

    count = multiprocessing.cpu_count() - 1
    p = pool.Pool(count)

    parallel = config.get('debug', False)

    with contextlib.closing(pool.Pool(count)) as p:
        out = parmap.map(runOne, iterable, config, pool=p, parallel=parallel)

    return out
def main(args):
        modellist  = args.modellist
        testlist   = [line.strip() for line in open(args.testlist)] #paths and file names
	outputPath = args.outputPath
	keepPredict= args.keepPredict	
        scale      = args.scale
	kind       = args.kind
	seg        = int(args.seg)

        modelbox=[]
        model_name_list=[]
        for line in open(modellist):
                modelfile = line.strip()
                #print "Loading...", modelfile
                model = joblib.load(modelfile)
                modelbox.append(model)
                #not sure whether the model type can be add to the list
        print "Loaded model", modellist

        if 'scale' in locals() and len(scale):
                (maxDict, minDict) = load_param(scale)

        # now do 'map' in parallel
        print 'Executing predict parmap:', len (testlist)
        if os.environ.get('PBS_NUM_PPN') is None:
                mapresult = [iterateFiles (file, outputPath, modelbox, keepPredict, kind, seg) for file in testlist]
        else:
                np = int(os.environ.get('PBS_NUM_PPN'))
                print '  np=', np
                mapresult = parmap.map (iterateFiles, testlist, outputPath, modelbox, keepPredict, kind, seg, processes=np)
        print 'Done!'
Example #31
0
def gen_grid_interp(dim,
                    zbounds=[-2.5, -0.25],
                    bound_dict={
                        'alpha': [-5, -1.5],
                        'mu': [-1.5, 0.75],
                        'sigma': [0.25, 1.0]
                    }):
    ### Modified to accomodate input function
    span_list = {}
    for key in bound_dict.keys():
        print(key)
        span_list[key] = np.arange(*bound_dict[key], 0.1)
        print(span_list[key])

    MESH = np.meshgrid(*[span_list[key] for key in span_list.keys()],
                       indexing='ij')

    pool_output = parmap.map(pool_function,
                             zip(*[GRID_ELE.flatten() for GRID_ELE in MESH]),
                             zbounds,
                             pm_processes=4)

    print([len(span_list[key]) for key in span_list.keys()])

    grid_interp = RegularGridInterpolator(
        [GRID_ELE.flatten() for GRID_ELE in MESH],
        np.array(pool_output).flatten().reshape(
            [len(span_list[key]) for key in span_list.keys()]))

    return grid_interp
Example #32
0
def run_split_parallel(ptps, labels, CONFIG, ptp_cut=5):

    all_units = np.unique(labels)

    new_labels = np.ones(len(ptps), 'int32') * -1

    n_processors = CONFIG.resources.n_processors
    if CONFIG.resources.multi_processing:
        units_in = []
        for j in range(n_processors):
            units_in.append(all_units[slice(j, len(all_units), n_processors)])
        results = parmap.map(run_split,
                             units_in,
                             ptps,
                             labels,
                             CONFIG,
                             ptp_cut,
                             processes=n_processors)
        n_labels = 0
        for rr in results:
            for rr2 in rr:
                ii_ = rr2[:, 0]
                lab_ = rr2[:, 1]
                new_labels[ii_] = lab_ + n_labels
                n_labels += len(np.unique(lab_))
    else:
        results = run_split(all_units, ptps, labels, CONFIG, ptp_cut)
        n_labels = 0
        for rr in results:
            ii_ = rr[:, 0]
            lab_ = rr[:, 1]
            new_labels[ii_] = lab_ + n_labels
            n_labels += len(np.unique(lab_))

    return new_labels
Example #33
0
    def generate_heuristics(self, model,min_cardinality=1, max_cardinality=1):
        """
        Generates heuristics over given feature cardinality
        model: fit logistic regression or a decision tree
        max_cardinality: max number of features each heuristic operates over
        """
        # have to make a dictionary?? or feature combinations here? or list of arrays?
        feature_combinations_final = []
        heuristics_final = []

        feature_length = 0
        for cardinality in range(min_cardinality, max_cardinality + 1):
            feature_combinations = self.generate_feature_combinations(cardinality)
            #######single-core
            # heuristics = []
            # for i, comb in enumerate(feature_combinations):
            #     heuristics.append(self.fit_function(comb, model))



            ########with parmap
            heuristics = parmap.map(self.fit_and_return, feature_combinations, model, pm_pbar=True)





            feature_combinations_final.append(feature_combinations)
            heuristics_final.append(heuristics)

        return heuristics_final, feature_combinations_final
Example #34
0
def parmap_batch_generator(data_total, endpoints_total, mins_dynamic,
                           scales_dynamic, max_n_step):
    time_series_all = []
    time_series_endpoint_all = []
    for p in range(len(data_total)):
        print(p)
        path = data_total[p]
        path_endpoint = endpoints_total[p]
        data_frame = pd.read_hdf(path).fillna(0)
        data_frame_endpoint = pd.read_hdf(path_endpoint).fillna(0)
        assert not data_frame.isnull().values.any(), "No NaNs allowed"
        assert not data_frame_endpoint.isnull().values.any(), "No NaNs allowed"
        patients = data_frame.patientunitstayid.unique()

        temp = parmap.map(get_patient_n, patients, data_frame,
                          data_frame_endpoint, max_n_step, mins_dynamic,
                          scales_dynamic)

        data = []
        labels = []
        for a in range(len(temp)):
            for b in range(len(temp[a][1])):
                labels.append(temp[a][1][b])
                data.append(temp[a][0][b])
        data = np.array(data)
        labels = np.array(labels)
        time_series_all.extend(data)
        time_series_endpoint_all.extend(labels)

    return time_series_all, time_series_endpoint_all
def get_evoked_map(mouse):
    
    lofiles, lofilenames = get_file_list(base_dir, mouse)
    print lofilenames
    lop = get_distance_var(lofiles)

    all_frames = get_video_frames(lofiles)
    print "Alligning all video frames..."

    all_frames = parmap.starmap(shift_frames, zip(all_frames, lop))
    all_frames = np.asarray(all_frames, dtype=np.float32)
    print np.shape(all_frames)

    new_all_frames = parmap.map(process_frames_evoked, all_frames)
    
    all_frames = np.reshape(all_frames,
                            (all_frames.shape[0]*all_frames.shape[1],
                            all_frames.shape[2],
                            all_frames.shape[3])) 
    save_to_file("conc_RAW.raw", all_frames, np.float32)


    print "Creating array.."
    new_all_frames = np.asarray(new_all_frames, dtype=np.float32)
    print "Averaging together..."
    new_all_frames = np.mean(new_all_frames, axis=0)

    print np.shape(new_all_frames)


    save_to_file("evoked_trial_noBP_GSR.raw", new_all_frames, np.float32)
Example #36
0
def runAll(func, iterable, config):
    """Run func over every element on iterable in parallel.

    Not yet run or tested.

    Inputs:
    ----------
    func
	(A function) The top level function, e.g runOne(), below

    iterable
	(list, array, etc.) A list of values to operate on.

    config
	(Clipboard) A configuration clipboard.
    """

    count = multiprocessing.cpu_count() - 1
    p = pool.Pool(count)


    parallel = config.get('debug', False)

    with contextlib.closing(pool.Pool(count)) as p:
        out = parmap.map(runOne, iterable, config, pool=p, parallel=parallel)

    return out
def build_HDF5(jpeg_dir, nb_channels, data_dir, size=256):
    """
    Gather the data in a single HDF5 file.
    """

    data_dir = os.path.join(data_dir, 'processed')

    # Put train data in HDF5
    file_name = os.path.basename(jpeg_dir.rstrip("/"))
    hdf5_file = os.path.join(data_dir, "%s_data.h5" % file_name)
    with h5py.File(hdf5_file, "w") as hfw:

        for dset_type in ["train", "test", "val"]:

            list_img = [
                img for img in Path(jpeg_dir).glob('%s/*.jpg' % dset_type)
            ]
            list_img = [str(img) for img in list_img]
            list_img.extend(list(Path(jpeg_dir).glob('%s/*.png' % dset_type)))
            list_img = list(map(str, list_img))
            list_img = np.array(list_img)

            data_full = hfw.create_dataset("%s_data_full" % dset_type,
                                           (0, nb_channels, size, size),
                                           maxshape=(None, 3, size, size),
                                           dtype=np.uint8)

            data_sketch = hfw.create_dataset("%s_data_sketch" % dset_type,
                                             (0, nb_channels, size, size),
                                             maxshape=(None, 3, size, size),
                                             dtype=np.uint8)

            num_files = len(list_img)
            chunk_size = 100
            num_chunks = num_files / chunk_size
            arr_chunks = np.array_split(np.arange(num_files), num_chunks)

            for chunk_idx in tqdm(arr_chunks):

                list_img_path = list_img[chunk_idx].tolist()
                output = parmap.map(format_image,
                                    list_img_path,
                                    size,
                                    nb_channels,
                                    pm_parallel=False)

                arr_img_full = np.concatenate([o[0] for o in output], axis=0)
                arr_img_sketch = np.concatenate([o[1] for o in output], axis=0)

                # Resize HDF5 dataset
                data_full.resize(data_full.shape[0] + arr_img_full.shape[0],
                                 axis=0)
                data_sketch.resize(data_sketch.shape[0] +
                                   arr_img_sketch.shape[0],
                                   axis=0)

                data_full[-arr_img_full.shape[0]:] = arr_img_full.astype(
                    np.uint8)
                data_sketch[-arr_img_sketch.shape[0]:] = arr_img_sketch.astype(
                    np.uint8)
Example #38
0
def main():
    data = np.loadtxt("./exampleTargets/C3/K2C3cat.txt", usecols=(0, ))

    cfg = pl.loadDefaultConfig()

    taskList = cfg['taskList']
    #    for i in range(len(taskList)):
    #        taskList[i] = "pl.%s" %(taskList[i])
    #    cfg['taskList'] = taskList

    cfg['taskList'] = taskList[:10]
    print cfg['taskList']

    count = multiprocessing.cpu_count() - 1
    p = pool.Pool(count)
    print count

    cfg['debug'] = False
    parallel = cfg.get('debug', False)
    parallel = False

    #Pool doesn't release threads even when it runs to completion.
    #Problem not related to exceptions being raised
    with contextlib.closing(pool.Pool(count)) as p:
        out = parmap.map(pl.runOne, data[1:3], cfg, parallel=parallel)
    p.join()
    p.close()

    return out
Example #39
0
def prepare_dataset(data_path,
                    subset=None,
                    path_to_save='./numpy_data',
                    processed_csv_path='./processed_dataset.csv',
                    resample_rate=None,
                    n_fft=2048,
                    hop_length=512,
                    slice_duration=2,
                    n_workers=1):
    print('hop_length = ', hop_length)
    mus = musdb.DB(root_dir=data_path)
    music_list = mus.load_mus_tracks(subsets=subset)
    print('Starting preparing dataset...')
    if not os.path.exists(path_to_save):
        os.makedirs(path_to_save)
    processed_csv = pd.DataFrame(columns=['mix'] +
                                 list(music_list[0].targets.keys()))
    # p = multiprocessing.Pool(6)
    rows = parmap.map(process_audio,
                      music_list,
                      processed_csv,
                      pm_pbar=True,
                      pm_processes=n_workers,
                      path_to_save=path_to_save,
                      n_fft=n_fft,
                      resample_rate=resample_rate,
                      hop_length=hop_length,
                      slice_duration=slice_duration)
    for r in rows:
        for n in r:
            processed_csv.loc[len(processed_csv)] = n

    processed_csv.to_csv(processed_csv_path, index=False)
Example #40
0
def GMMSel(coords, gmm, covar=None, sel_gmm=None, cutoff_nd=3., rng=np.random):
    # swiss cheese selection based on a GMM:
    # if within 1 sigma of any component: you're out!
    import multiprocessing, parmap
    n_chunks, chunksize = sel_gmm._mp_chunksize()
    inside = np.array(parmap.map(insideComponent, range(sel_gmm.K), sel_gmm, coords, covar, cutoff_nd, pm_chunksize=chunksize))
    return np.max(inside, axis=0)
def get_distance_var(lof,width,height,frame_oi):
    
    filtered_frames=[]
    print('')
    print('Now in get_distance_var')
    print(lof)
    for f in lof:
        print('loop')
        print(f)
        frames=get_green_frames(f,width,height)
        print(type(frames))
        filtered_frames.append(filter2_test_j(frames[frame_oi,:,:]))

    print "Getting all the distances.."
    # Get all the distances using all videos as ref point, thus size of matrix is n^2
    list_of_ref = []
    for frame_ref in filtered_frames:
        list_of_positions = []
        res_trials = parmap.map(image_registration.chi2_shift, filtered_frames, frame_ref) 
        # res_trials is array of trials * [dx, dy, edx, edy]
        for res in res_trials:
            list_of_positions.append(Position(res[0], res[1]))
        #for frame in filtered_frames:
        #    dx, dy, edx, edy = image_registration.chi2_shift(frame_ref, frame)
        #    list_of_positions.append(Position(dx, dy))

        list_of_ref.append(list_of_positions)
    print "Finding the min..."
    list_of_positions = find_min_ref(list_of_ref)

    return list_of_positions
def main():
	# Print M0 such that you are able to reconstruct the messages used at any node
	print("M0_1:")
	prints(M0_1)

	print("M0_2:")
	prints(M0_2)

	# Make room for 2^n nodes
	print("[!] Initializing hypercube")
	hc = [None for _ in range(2**DIM)]
	print("[+] Completed initializing hypercube")

	# Compute the MAC of each node in parallel and show a progress bar
	print("[!] Building hypercube...")
	hc = parmap.map(functools.partial(build_node, hc=hc), range(len(hc)), pm_pbar=True, pm_processes=8, pm_chunksize=100)
	print("[+] Completed building hypercube")	

	# Check for collisions in O(n)
	print("[!] Checking for full collisions...")
	D = defaultdict(list)
	for idx, mac in enumerate(hc):
		D[tuple(mac)].append(idx)
	D = {k: v for k, v in D.items() if len(v) > 1}

	if len(D.items()) == 0:
		print("[-] No collisions found")
	else:
		print("[+] Collisions found")
		for k, v in D.items():
			print("Nodes", v, "share the following MAC:")
			prints(k)
Example #43
0
 def test_map_without_parallel_timings(self):
     NUM_TASKS = 6
     items = range(NUM_TASKS)
     mytime = time.time()
     pfalse = parmap.map(_wait, items, pm_parallel=False)
     elapsed = time.time() - mytime
     self.assertTrue(elapsed >= TIME_PER_TEST*NUM_TASKS)
     self.assertEqual(pfalse, list(range(NUM_TASKS)))
Example #44
0
    def parallel(self, names, fileType='fasta', nprocs=1, **kwargs):
        """Running simulator using apply_async

        Args:
        names -- NameFile class with iter_names() method
        fileType -- sequence file format
        nprocs -- max number of parallel simulation calls
        kwargs -- passed to simulator

        Attribs added to each name instance in names:
        simReadsFile -- file name of simulated reads
        simReadsFileType -- file type (eg., 'fasta' or 'fastq')
        simReadsFileCount -- number of simulated reads

        Return:
        boolean on run success/fail
        """
        # making list of fasta file to provide simulator call
        fastaFiles = [name.get_fastaFile() for name in names.iter_names()]

        # settig kwargs
        new_simulator = partial(self, **kwargs)

        # calling simulator
        res = parmap.map(new_simulator, fastaFiles, processes=nprocs)

        # checking that simulated reads were created for all references; return 1 if no file
        for row in res:
            if row['simReadsFile'] is None or not os.path.isfile(row['simReadsFile']):
                return 1
            elif os.stat(row['simReadsFile'])[0] == 0:  # file size = 0
                return 1
        
        # converting reads to fasta if needed
        if fileType.lower() == 'fasta':
            for result in res:
                simFile = result['simReadsFile']
                fileType = result['simReadsFileType'].lower()
                if fileType != 'fasta':
                    fastaFile = os.path.splitext(simFile)[0] + '.fna'
                    SeqIO.convert(simFile, fileType, fastaFile, 'fasta')
                    result['simReadsFile'] = fastaFile
                    result['simReadsFileType'] = 'fasta'
                    
        # setting attribs in name instances                    
        for i,name in enumerate(names.iter_names()):
            # read file
            simReadsFile = res[i]['simReadsFile']
            name.set_simReadsFile(simReadsFile)
            # file type
            fileType = res[i]['simReadsFileType'].lower()
            name.set_simReadsFileType(fileType)
            # number of simulated reads            
            num_reads = len([True for i in SeqIO.parse(simReadsFile, fileType)])
            name.set_simReadsCount(num_reads)
            
        return 0
Example #45
0
def parallel_map(*args, processes = 1):
    """
    Wrapper function for 'parmap.map': Parallises the computations in 
    'map' form if required. If only one process is needed, computations 
    are performed serially
    """
    if processes == 1:
        return [args[0](element, *args[2:]) for element in args[1]]
    return parmap.map(*args, processes = processes)
Example #46
0
 def test_map_with_parallel_timings(self):
     NUM_TASKS = 6
     items = range(NUM_TASKS)
     mytime = time.time()
     ptrue = parmap.map(_wait, items, pm_processes=NUM_TASKS,
                        pm_parallel=True)
     elapsed = time.time() - mytime
     self.assertTrue(elapsed >= TIME_PER_TEST)
     self.assertTrue(elapsed < TIME_PER_TEST*(NUM_TASKS-1))
     self.assertEqual(ptrue, list(range(NUM_TASKS)))
Example #47
0
def hcluster(features, ed, cf, distance=CID):
    # cluster the rows of the "features" matrix
    distances = {}
    currentclustid = -1

    # clusters are initially just the individual rows
    clust = [cluster_node(array(features[i]), id=i)
             for i in range(len(features))]

    while len(clust) > 1:
        lowestpair = (0, 1)
        closest = distance(clust[0].id, clust[1].id, ed, cf)
        len1 = len(clust)
        # loop through every pair looking for the smallest distance
        '''for i in range(len(clust)):
            for j in range(i + 1, len(clust)):
            # distances is the cache of distance calculations
                if (clust[i].id, clust[j].id) not in distances:
                    distances[(clust[i].id, clust[j].id)] = distance(
                        clust[i].id, clust[j].id, ed, cf)

                d = distances[(clust[i].id, clust[j].id)]

                if d < closest:
                    closest = d
                    lowestpair = (i, j)'''

        args = [[clust, distances, 0, 0, len1/2, len1/2, 0, distance],
                [clust, distances, len1/2, len1/2, len1 - 1, len1 - 1, 0, distance],
                [clust, distances, len1 - 1, 0, len1/2, len1/2, 1, distance],
                [clust, distances, len1 - 1, 0, len1/2, len1/2, 2, distance]
                ]
        final = parmap.map(findMin, args)
        print final
        minindex = argmin([distances[clust[lowestpair[0]].id, clust[lowestpair[1]].id] for \
            lowestpair in final])

        lowestpair = final[minindex]

        # calculate the average of the two clusters
        mergevec = [(clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i]) / 2.0
                    for i in range(len(clust[0].vec))]

        # create the new cluster
        newcluster = cluster_node(array(mergevec), left=clust[lowestpair[0]],
                                  right=clust[lowestpair[1]],
                                  distance=closest, id=currentclustid)

        # cluster ids that weren't in the original set are negative
        currentclustid -= 1
        del clust[lowestpair[1]]
        del clust[lowestpair[0]]
        clust.append(newcluster)

    return clust[0]
Example #48
0
File: train.py Project: psdh/SaTS
def calc(series):
    len1 = len(series)
    ser = []
    for i in range(len1):
        for j in range(i+1, len1):
            ser.append([series[i], series[j]])

    distances = parmap.map(calculateED, ser)

    print "len of distances: " + str(len(distances))

    return distances
Example #49
0
def generatePredictions(method, hlas, peptides, cpus=1, verbose=False):
    """Does not work because peptides is also an iterator...."""
    if verbose:
        """Create console handler and set level to debug"""
        logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(asctime)s:%(message)s')
        logging.info('HLA prediction initialized for %d HLA allele(s) using method %s on %d CPU(s)', len(hlas), method, cpus)

    if cpus > 1:
        result = parmap.map(predictHLA_mhctools, hlas, method, peptides, verbose, pool=Pool(processes=cpus))
    else:
        result = parmap.map(predictHLA_mhctools, hlas, method, peptides, verbose, parallel=False)

    """Remove None's"""
    outDf = pd.concat([r for r in result if not r is None], axis=0)

    """Take the log of the prediction if neccessary."""
    if outDf.affinity.max() > 100:
        outDf.loc[:, 'pred'] = np.log(outDf.affinity)

    if verbose:
        logging.info('Completed %d predictions (expected %d)', outDf.shape[0], len(hlas) * len(peptides))
    return outDf
Example #50
0
def calc_loss_arrays(fc, sc, component_resp_df, parallel_proc):

    # print("\nCalculating system response to hazard transfer parameters...")
    component_resp_dict = component_resp_df.to_dict()
    sys_output_dict = {k: {o: 0 for o in fc.network.out_node_list}
                       for k in sc.hazard_intensity_str}
    ids_comp_vs_haz = {p: np.zeros((sc.num_samples, fc.num_elements))
                       for p in sc.hazard_intensity_str}

    calculated_output_array = np.zeros((sc.num_samples, sc.num_hazard_pts))
    economic_loss_array = np.zeros_like(calculated_output_array)
    output_array_given_recovery = np.zeros(
        (sc.num_samples, sc.num_hazard_pts, sc.num_time_steps)
    )

    if parallel_proc:
        print('\nInitiating computation of loss arrays...')
        print(Fore.YELLOW + 'using parallel processing\n' + Fore.RESET)
        parallel_return = parmap.map(
            multiprocess_enabling_loop, range(len(sc.hazard_intensity_str)),
            sc.hazard_intensity_str, sc.num_hazard_pts, fc, sc
        )

        for idxPGA, _PGA in enumerate(sc.hazard_intensity_str):
            ids_comp_vs_haz[_PGA] = parallel_return[idxPGA][0]
            sys_output_dict[_PGA] = parallel_return[idxPGA][1]
            component_resp_dict[_PGA] = parallel_return[idxPGA][2]
            calculated_output_array[:, idxPGA] = parallel_return[idxPGA][3]
            economic_loss_array[:, idxPGA] = parallel_return[idxPGA][4]
            output_array_given_recovery[:, idxPGA, :] = \
                parallel_return[idxPGA][5]
    else:
        print('\nInitiating computation of loss arrays...')
        print(Fore.RED + 'not using parallel processing\n' + Fore.RESET)
        for idxPGA, _PGA in enumerate(sc.hazard_intensity_str):
            ids_comp_vs_haz[_PGA], \
            sys_output_dict[_PGA], \
            component_resp_dict[_PGA], \
            calculated_output_array[:, idxPGA], \
            economic_loss_array[:, idxPGA], \
            output_array_given_recovery[:, idxPGA, :] = \
                multiprocess_enabling_loop(
                    idxPGA=idxPGA, _PGA_dummy=_PGA,
                    nPGA=sc.num_hazard_pts, fc=fc, sc=sc)

    return ids_comp_vs_haz, \
           sys_output_dict, \
           component_resp_dict, \
           calculated_output_array, \
           economic_loss_array, \
           output_array_given_recovery
Example #51
0
def multiprocess(f, iterable, *args, **kwargs):
    """
    Map an iterable to a function. Default key function chunks iterable by
    1000s.

    :param f: function
    :param iterable: any iterable where each item is sent to f
    :param *args: arguments passed to mapped function
    :param **kwargs: additional arguments for parmap.map
    """
    chunksize = kwargs.pop('chunksize', 1000)
    key = kwargs.pop('key', lambda k, l=count(): next(l)//chunksize)
    for k, g in groupby(iterable, key=key):
        yield parmap.map(f, g, *args, **kwargs)
def get_correlation_map(seed_x, seed_y, frames):
    seed_pixel = np.asarray(frames[:, seed_x, seed_y], dtype=np.float32)
    
    print np.shape(seed_pixel)
    # Reshape into time and space
    frames = np.reshape(frames, (frames.shape[0], width*height))
    print np.shape(frames)
    print 'Getting correlation... x=', seed_x, ", y=", seed_y

    correlation_map = parmap.map(corr, frames.T, seed_pixel)
    correlation_map = np.asarray(correlation_map, dtype=np.float32)
    correlation_map = np.reshape(correlation_map, (width, height))
    print np.shape(correlation_map)

    return correlation_map
def build_HDF5(jpeg_dir, nb_channels, size=256):
    """
    Gather the data in a single HDF5 file.
    """

    # Put train data in HDF5
    file_name = os.path.basename(jpeg_dir.rstrip("/"))
    hdf5_file = os.path.join(data_dir, "%s_data.h5" % file_name)
    with h5py.File(hdf5_file, "w") as hfw:

        for dset_type in ["train", "test", "val"]:

            list_img = list(Path(jpeg_dir).glob('%s/*.jpg' % dset_type))
            list_img.extend(list(Path(jpeg_dir).glob('%s/*.png' % dset_type)))
            list_img = map(str, list_img)
            list_img = np.array(list_img)

            data_full = hfw.create_dataset("%s_data_full" % dset_type,
                                           (0, nb_channels, size, size),
                                           maxshape=(None, 3, size, size),
                                           dtype=np.uint8)

            data_sketch = hfw.create_dataset("%s_data_sketch" % dset_type,
                                             (0, nb_channels, size, size),
                                             maxshape=(None, 3, size, size),
                                             dtype=np.uint8)

            num_files = len(list_img)
            chunk_size = 100
            num_chunks = num_files / chunk_size
            arr_chunks = np.array_split(np.arange(num_files), num_chunks)

            for chunk_idx in tqdm(arr_chunks):

                list_img_path = list_img[chunk_idx].tolist()
                output = parmap.map(format_image, list_img_path, size, nb_channels, parallel=False)

                arr_img_full = np.concatenate([o[0] for o in output], axis=0)
                arr_img_sketch = np.concatenate([o[1] for o in output], axis=0)

                # Resize HDF5 dataset
                data_full.resize(data_full.shape[0] + arr_img_full.shape[0], axis=0)
                data_sketch.resize(data_sketch.shape[0] + arr_img_sketch.shape[0], axis=0)

                data_full[-arr_img_full.shape[0]:] = arr_img_full.astype(np.uint8)
                data_sketch[-arr_img_sketch.shape[0]:] = arr_img_sketch.astype(np.uint8)
Example #54
0
def get_reads_in_intervals(bam, intervals, strand_specific=False):
    """
    Counts reads in a iterable holding strings
    representing genomic intervals of the type chrom:start-end.
    """
    # count, create dataframe
    coverage = parmap.map(
        coverage_single,
        intervals.values(),
        bam,
        strand_specific=strand_specific,
        parallel=True)

    if not strand_specific:
        coverage = np.vstack(coverage)
    else:
        coverage = (np.vstack([x[0] for x in coverage]), np.vstack([x[1] for x in coverage]))
    return coverage
Example #55
0
    def get_correlation_map(self, seed_x, seed_y, frames):
        seed_pixel = np.asarray(frames[:, seed_x, seed_y], dtype=np.float32)
        
        print np.shape(seed_pixel)
        # Reshape into time and space
        frames = np.reshape(frames, (frames.shape[0], width*height))
        print np.shape(frames)
        #correlation_map = []
        print 'Getting correlation...'
        #correlation_map = Parallel(n_jobs=4, backend="threading")(delayed(corr)(pixel, seed_pixel) for pixel in frames.T)
        #correlation_map = []
        #for i in range(frames.shape[-1]):
        #    correlation_map.append(pearsonr(frames[:, i], seed_pixel)[0])
        correlation_map = parmap.map(corr, frames.T, seed_pixel)
        correlation_map = np.asarray(correlation_map, dtype=np.float32)
        correlation_map = np.reshape(correlation_map, (width, height))
        print np.shape(correlation_map)

        return correlation_map
Example #56
0
def return_answer(text):
    print 'searching...'
    global an1
#arr = np.array(qvectors)
#qvector = arr.sum(axis = 0)/len(arr)
    qvectors = calculate_qvectors(text)
#    print qvectors
#    from sklearn.metrics.pairwise import cosine_similarity
#    an2 = []
    an2 = parmap.map(calculate_sim, answervectors[:35000], qvectors, pool = Pool(12))
#    print an2
#    pool.close()
#    pool.join()
    try:
        qq = sorted(an2, key=lambda x: x[1], reverse = True)
        ans=[]
        for t in qq[0:5]:
            ans.append(sents[t[0]])
    except IndexError:
            ans = 'error'
    return ans
Example #57
0
 def getPage(page):
     print 'Fetching watches list of user %s(page %d)...' % (uid, page)
     global s, header
     url = 'http://weibo.cn/%s/follow?page=%d'  % (uid, page)
     max_num_per_sec(1)
     resp = s.get(url, headers=header)
     if resp.status_code != 200: raise Exception("%d - status code err" % resp.status_code)
     soup = BeautifulSoup.BeautifulSoup(resp.text)
     try:
         trs = [i.find('tr') for i in soup.findAll('table')]
     except:
         print soup
         debug_save(resp.content)
         print 'GET %s error, cookies: ' % url
         print s.cookies.get_dict()
         exit()
     res = parmap.map(parseTr, trs)
     pagelist_div = soup.find(id='pagelist')
     if pagelist_div is not None:
         totalpage = int(find_inner_text(pagelist_div.find('div').text, '/',u'\u9875'))
     else:
         totalpage = 1
     return res, totalpage
def build_HDF5(size=64):
    """
    Gather the data in a single HDF5 file.
    """

    # Read evaluation file, build it if it does not exist
    # In evaluation status, "0" represents training image, "1" represents
    # validation image, "2" represents testing image;
    d_partition = {}
    with open(os.path.join(raw_dir, "Eval/list_eval_partition.txt"), "r") as f:
        lines = f.readlines()
        for celeb in lines:
            celeb = celeb.rstrip().split()
            img = celeb[0]
            attrs = int(celeb[1])
            d_partition[img] = attrs
    with open(os.path.join(data_dir, "d_partition.pickle"), "w") as fd:
        pickle.dump(d_partition, fd)

    # Put train data in HDF5
    hdf5_file = os.path.join(data_dir, "CelebA_%s_data.h5" % size)
    with h5py.File(hdf5_file, "w") as hfw:

        for dset_idx, dset_type in enumerate(["training", "validation", "test"]):

            list_img = []
            for img in d_partition.keys():
                if d_partition[img] == dset_idx:
                    list_img.append(os.path.join(raw_dir, "img_align_celeba", img))
            list_img = np.array(list_img)

            data_color = hfw.create_dataset("%s_color_data" % dset_type,
                                            (0, 3, size, size),
                                            maxshape=(None, 3, size, size),
                                            dtype=np.uint8)

            data_lab = hfw.create_dataset("%s_lab_data" % dset_type,
                                          (0, 3, size, size),
                                          maxshape=(None, 3, size, size),
                                          dtype=np.float64)

            data_black = hfw.create_dataset("%s_black_data" % dset_type,
                                            (0, 1, size, size),
                                            maxshape=(None, 1, size, size),
                                            dtype=np.uint8)

            num_files = len(list_img)
            chunk_size = 1000
            num_chunks = num_files / chunk_size
            arr_chunks = np.array_split(np.arange(num_files), num_chunks)

            for chunk_idx in tqdm(arr_chunks):

                list_img_path = list_img[chunk_idx].tolist()
                output = parmap.map(format_image, list_img_path, size, parallel=True)

                arr_img_color = np.vstack([o[0] for o in output if o[0].shape[0] > 0])
                arr_img_lab = np.vstack([o[1] for o in output if o[0].shape[0] > 0])
                arr_img_black = np.vstack([o[2] for o in output if o[0].shape[0] > 0])

                # Resize HDF5 dataset
                data_color.resize(data_color.shape[0] + arr_img_color.shape[0], axis=0)
                data_lab.resize(data_lab.shape[0] + arr_img_lab.shape[0], axis=0)
                data_black.resize(data_black.shape[0] + arr_img_black.shape[0], axis=0)

                data_color[-arr_img_color.shape[0]:] = arr_img_color.astype(np.uint8)
                data_lab[-arr_img_lab.shape[0]:] = arr_img_lab.astype(np.float64)
                data_black[-arr_img_black.shape[0]:] = arr_img_black.astype(np.uint8)
def align_frames(mouse, dir, freq):
    lofiles, lofilenames = get_file_list(dir+"Videos/", mouse)
    print lofilenames
    lop = get_distance_var(lofiles)
    
    all_frames = np.asarray(get_video_frames(lofiles), dtype=np.uint8)
    print "Alligning all video frames..."

    all_frames = parmap.starmap(shift_frames, zip(all_frames, lop))
##    for i in range(len(lop)):
##        for frame in all_frames[i]:
##            frame = image_registration.fft_tools.shift2d(frame, lop[i].dx, lop[i].dy)

    print np.shape(all_frames)

    count = 0
    new_all_frames = parmap.map(process_frames, all_frames, freq, mouse, dir)
    '''
    for frames in all_frames:
        print np.shape(frames)
        save_to_file("Green/"+lofilenames[count][:-4]+"_aligned.raw", frames, np.float32)

        print "Calculating mean..."
        avg_pre_filt = calculate_avg(frames)

        print "Temporal filter..."
        frames = cheby_filter(frames)
        frames += avg_pre_filt
        save_to_file("Green/Cheby/"+lofilenames[count][:-4]+"_BPFilter_0.1-1Hz.raw", frames, np.float32)


        print "Calculating DF/F0..."
        frames = calculate_df_f0(frames)
        save_to_file("Green/DFF/"+lofilenames[count][:-4]+"_DFF.raw", frames, np.float32)

        print "Applying MASKED GSR..."
        #frames = gsr(frames)
        frames = masked_gsr(frames, save_dir+"202_mask.raw")
        save_to_file("Green/GSR/"+lofilenames[count][:-4]+"_GSR.raw", frames, np.float32)


        print "Getting SD map..."
        sd = standard_deviation(frames)
        save_to_file("Green/SD_maps/"+lofilenames[count][:-4]+"_SD.raw", frames, np.float32)

        new_all_frames.append(frames)
        count += 1
    '''
    print "Creating array..."
    new_all_frames = np.asarray(new_all_frames, dtype=np.float32)
    all_frames = np.asarray(all_frames, dtype=np.float32)
    
    print "Joining Files..."
    new_all_frames = np.reshape(new_all_frames,
                            (new_all_frames.shape[0]*new_all_frames.shape[1],
                            new_all_frames.shape[2],
                            new_all_frames.shape[3]))
    all_frames = np.reshape(all_frames,
                            (all_frames.shape[0]*all_frames.shape[1],
                            all_frames.shape[2],
                            all_frames.shape[3]))

    print "Shapes: "
    print np.shape(all_frames)
    print np.shape(new_all_frames)

    where_are_NaNs = np.isnan(new_all_frames)
    new_all_frames[where_are_NaNs] = 0

    save_to_file("FULL_conc.raw", new_all_frames, np.float32)
    save_to_file("conc_RAW.raw", all_frames, np.float32)
    sd = standard_deviation(new_all_frames)
    save_to_file("FULL_SD.raw", sd, np.float32)

    print "Displaying correlation map..."
    mapper = CorrelationMapDisplayer(new_all_frames)
    mapper.display('spectral', -0.3, 1.0)
Example #60
0
def test_eval_ned_baseline(aph_testset_dataframe, aph_test_ann_files):
    """TODO."""
    ann_dir, ann_files = aph_test_ann_files
    testset_gold_df = aph_testset_dataframe

    logger.info(
        tabulate(
            testset_gold_df.head(20)[["type", "surface", "scope", "urn"]]
        )
    )

    kb_cfg_file = pkg_resources.resource_filename(
        'knowledge_base',
        'config/virtuoso_local.ini'
    )
    kb = KnowledgeBase(kb_cfg_file)
    """
    kb_data = {
            "author_names": kb.author_names
            , "author_abbreviations": kb.author_abbreviations
            , "work_titles": kb.work_titles
            , "work_abbreviations": kb.work_abbreviations
            }

    with codecs.open("citation_extractor/data/pickles/kb_data.pkl","wb") as pickle_file:
        pickle.dump(kb_data, pickle_file)
    """

    with codecs.open("citation_extractor/data/pickles/kb_data.pkl","rb") as pickle_file:
        kb_data = pickle.load(pickle_file)

    cms = {}

    ##############################
    # Test 1: default parameters #
    ##############################

    cms["cm1"] = CitationMatcher(
        kb,
        fuzzy_matching_entities=False,
        fuzzy_matching_relations=False,
        **kb_data
    )

    ##############################
    # Test 2: best parameters    #
    ##############################
    cms["cm2"] = CitationMatcher(
        kb,
        fuzzy_matching_entities=True,
        fuzzy_matching_relations=True,
        min_distance_entities=4,
        max_distance_entities=7,
        distance_relations=4,
        **kb_data
    )

    """

    #####################################
    # Test 3: alternative parameters    #
    #####################################

    cms["cm3"] = CitationMatcher(kb
                        , fuzzy_matching_entities=True
                        , fuzzy_matching_relations=False
                        , min_distance_entities=4
                        , max_distance_entities=7)
    """

    comp_evaluation = []
    comp_accuracy_by_type = []

    # for each citation matcher disambiguate the records in the test set,
    # carry out the evaluation and store the results in two temporary lists
    # (to be transformed later on into two dataframes)
    for key in sorted(cms.keys()):
        cm = cms[key]
        testset_target_df = testset_gold_df.copy()

        # run the parallel processing of records
        results = parmap.map(_pprocess, ((n, row[0], row[1]) for n, row in enumerate(testset_target_df.iterrows())), cm)

        # collect the results and update the dataframe
        for instance_id, urn in results:
            testset_target_df.loc[instance_id]["urn_clean"] = urn

        # save pickle for later
        #testset_target_df.to_pickle("citation_extractor/data/pickles/test_target_dataframe_%s.pkl" % key)

        scores, accuracy_by_type, error_types, errors = evaluate_ned(testset_gold_df, ann_dir, testset_target_df, strict=True)

        # aggregate and format the evaluation measure already with percentages
        scores = {score_key: "%.2f%%" % (scores[score_key]*100) for score_key in scores}
        scores["id"] = key
        comp_evaluation.append(scores)

        # aggregate and format the accuracy by type already with percentages
        accuracy = {type_key : "%.2f%%" % (accuracy_by_type[type_key]*100) for type_key in accuracy_by_type}
        accuracy["id"] = key
        comp_accuracy_by_type.append(accuracy)

    comp_evaluation_df = pd.DataFrame(comp_evaluation, index=[score["id"] for score in comp_evaluation])
    del comp_evaluation_df["id"] # we don't need it twice (already in the index)

    comp_accuracy_by_type_df = pd.DataFrame(comp_accuracy_by_type, index=[accuracy["id"] for accuracy in comp_accuracy_by_type])
    del comp_accuracy_by_type_df["id"] # we don't need it twice (already in the index)

    logger.info("\n" + tabulate(comp_evaluation_df, headers=comp_evaluation_df.columns))
    logger.info("\n" + tabulate(comp_accuracy_by_type_df, headers=comp_accuracy_by_type_df.columns))
    logger.info("\n" + "\n".join(["%s: %s" % (key, cms[key].settings) for key in cms]))