コード例 #1
0
def save_model(model, model_filepath):
    """ Saves the model passed given the name of the file"""

    cp.dump(model,
            model_filepath,
            compression='lzma',
            set_default_extension=False)
コード例 #2
0
    def _get_snp_classifications(self, genus):
        if genus == 'Pocillopora':
            # First check to see if the cached version exists
            snp_cache_dir = os.path.join(self.input_dir_18s,
                                         'snp_classifications',
                                         f'poc_snp_class_df.p.bz')
        elif genus == 'Porites':
            snp_cache_dir = os.path.join(self.input_dir_18s,
                                         'snp_classifications',
                                         f'por_snp_class_df.p.bz')

        if os.path.exists(snp_cache_dir):
            return compress_pickle.load(snp_cache_dir)
        else:
            # Need to create it from scratch
            if genus == 'Pocillopora':
                raw_snp_class_path = os.path.join(
                    self.input_dir_18s, 'snp_classifications',
                    f'POC_SNP_classifications.csv')
            elif genus == 'Porites':
                raw_snp_class_path = os.path.join(
                    self.input_dir_18s, 'snp_classifications',
                    f'POR_SNP_classifications.csv')

            snp_class_df = pd.read_csv(raw_snp_class_path, index_col=0)
            snp_class_df.index = self._convert_index_to_sample_ids(
                snp_class_df.index)
            snp_class_df.dropna(inplace=True)
            snp_class_df.columns = ['label']
            compress_pickle.dump(snp_class_df, snp_cache_dir)
            return snp_class_df
コード例 #3
0
def parse_information_files(list_of_enzyme_commission_numbers,
                            list_of_databases, cpus):
    output_path = "data/parsed_raw/parsed_dictionary_of_raw_data.gzip"
    if not os.path.exists(output_path):
        print("Creating information compressed file. Saved at {}".format(
            output_path))
        work_list = []
        for enzyme in list_of_enzyme_commission_numbers:
            for database in list_of_databases:
                file_path = "data/raw/{}_{}_raw.txt".format(enzyme, database)
                work_list.append((file_path, ))
        result_list = run_multiprocessing.run_mp(work_list, cpus, process_file)
        processed_results = split_results(result_list)
        dump(processed_results,
             output_path,
             compression="gzip",
             set_default_extension=False)
    else:
        print("Information file have been found at {}. Loading it.".format(
            output_path))
        processed_results = load(output_path,
                                 compression="gzip",
                                 set_default_extension=False)
        print("File loaded")
    return processed_results
コード例 #4
0
 def __init__(self, kernel, machine, cores=1):
     """Initialize cache simulation based predictor from kernel and machine object."""
     CachePredictor.__init__(self, kernel, machine, cores)
     if isinstance(kernel, KernelCode):
         # Make use of caching for symbolic LC representation:
         file_name = 'CSIM_analysis.pickle.lzma'
         file_path = kernel.get_intermediate_location(
             file_name,
             machine_and_compiler_dependent=False,
             other_dependencies=[str(cores)] +
             [str(t) for t in self.kernel.constants.items()])
         lock_mode, lock_fp = kernel.lock_intermediate(file_path)
         if lock_mode == fcntl.LOCK_SH:
             # use cache
             cache = compress_pickle.load(file_path)
             lock_fp.close()  # release lock
             self.first_dim_factor = cache['first_dim_factor']
             self.stats = cache['stats']
         else:  # lock_mode == fcntl.LOCK_EX
             # needs update
             self.simulate()
             compress_pickle.dump(
                 {
                     'first_dim_factor': self.first_dim_factor,
                     'stats': self.stats
                 }, file_path)
             lock_fp.close()  # release lock
     else:
         # No caching support without filename for kernel code
         self.simulate()
コード例 #5
0
def test_dump_load(dump_load):
    (
        message,
        path,
        compression,
        set_default_extension,
        optimize,
        expected_file,
        expected_fail,
    ) = dump_load
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", UserWarning)
        if expected_fail is None:
            dump(
                message,
                path,
                compression,
                set_default_extension=set_default_extension,
                optimize=optimize,
            )
            loaded_message = load(
                path, compression, set_default_extension=set_default_extension
            )
            assert loaded_message == message
        else:
            with pytest.raises(expected_fail):
                dump(
                    message,
                    path,
                    compression,
                    set_default_extension=set_default_extension,
                    optimize=optimize,
                )
            with pytest.raises(expected_fail):
                load(path, compression, set_default_extension=set_default_extension)
コード例 #6
0
    def __init__(self, kernel, machine, cores=1, symbolic=False):
        """Initialize layer condition based predictor from kernel and machine object."""
        CachePredictor.__init__(self, kernel, machine, cores=cores)
        if isinstance(kernel, KernelCode):
            # Make use of caching for symbolic LC representation:
            file_name = 'LC_analysis.pickle.lzma'
            file_path = kernel.get_intermediate_location(
                file_name,
                machine_and_compiler_dependent=False,
                other_dependencies=[str(cores)])
            lock_mode, lock_fp = kernel.lock_intermediate(file_path)
            if lock_mode == fcntl.LOCK_SH:
                # use cache
                self.results = compress_pickle.load(file_path)
                lock_fp.close()  # release lock
            else:  # lock_mode == fcntl.LOCK_EX
                # needs update
                self.build_symbolic_LCs()
                compress_pickle.dump(self.results, file_path)
                lock_fp.close()  # release lock
        else:
            # No caching support without filename for kernel code
            self.build_symbolic_LCs()

        if not symbolic:
            self.desymbolize()
コード例 #7
0
ファイル: remove.py プロジェクト: pepamengual/UEP_PPI
def remove_training_data():
    from compress_pickle import load

    training_data_path = "/home/pepamengual/UEP/trained_model/UEP_trained_model_4"
    skempi_data_path = "/home/pepamengual/UEP/trained_model/substracted_4"
    substracted_model = {}
    training_data = load(training_data_path,
                         compression="lzma",
                         set_default_extension=False)
    skempi_data = load(skempi_data_path,
                       compression="lzma",
                       set_default_extension=False)

    for environment, amino_acid_dict in training_data.items():
        for amino_acid, counts in amino_acid_dict.items():
            if environment in skempi_data and amino_acid in skempi_data[
                    environment]:
                substract = counts - skempi_data[environment][amino_acid]
                substracted_model.setdefault(environment, {}).setdefault(
                    amino_acid, substract)
            else:
                substracted_model.setdefault(environment, {}).setdefault(
                    amino_acid, counts)
    from compress_pickle import dump

    dump(substracted_model, "substracted_def_4", compression="lzma")
コード例 #8
0
def test_dump_load_on_filestreams(simple_dump_and_remove):
    path, compression, message, optimize = simple_dump_and_remove
    read_mode = "rb"  # get_compression_read_mode(compression)
    write_mode = "wb"  # get_compression_write_mode(compression)
    with open(path, write_mode) as f:
        dump(message, f, compression=compression, optimize=optimize)
    with open(path, read_mode) as f:
        raw_content = f.read()
        f.seek(0)
        loaded_message = load(f, compression=compression)
    assert loaded_message == message
    os.remove(path)
    dump(
        message,
        path,
        compression=compression,
        set_default_extension=False,
        optimize=optimize,
    )
    with open(path, read_mode) as f:
        benchmark = f.read()
    # zipfile compression stores the data in a zip archive. The archive then
    # contains a file with the data. Said file's mtime will always be
    # different between the two dump calls, so we skip the follwing assertion
    if compression != "zipfile":
        assert raw_content == benchmark
コード例 #9
0
ファイル: lcp.py プロジェクト: ericosur/ericosur-snippet
 def save_pickle_impl(self):
     '''
     save pvalues into pickle file
     overrides class StorePrime::save_pickle_impl()
     '''
     #compress_pickle.dump(self.pvalues, self.pfile, compression="lzma")
     compress_pickle.dump(self.pvalues, self.pfile)
コード例 #10
0
def dump(obj,
         filename,
         default_compression=DEFAULT_PICKLE_COMPRESSION,
         directory=RESULT_DIRECTORY,
         use_compression=PICKLE_COMPRESSION,
         save_full_object=SAVE_FULL_OBJECT):
    filename = str(filename)
    if not os.path.isabs(filename):
        filename = os.path.join(directory, filename)

    try:
        os.makedirs(os.path.dirname(filename))
    except OSError:
        pass

    if dill is not None and save_full_object:
        string_buff = dill.dumps(obj, recurse=True)
        obj_to_save = {'dill': True, 'obj': string_buff}
    else:
        obj_to_save = obj

    if compress_pickle is not None and use_compression:
        if os.path.splitext(filename)[0] == filename:
            filename = '.'.join([filename, default_compression])
        compress_pickle.dump(obj_to_save, filename)
    else:
        if os.path.splitext(filename)[0] == filename:
            filename = '.'.join([filename, use_compression])
        with open(filename, 'wb') as f:
            pickle.dump(obj_to_save, f)

    return filename
    def _get_hard_and_rel_sub_dicts(self, sample_names):
        if os.path.isfile(
                os.path.join(self.cache_dir, 'hard_sub_sample_dict.p.bz')):
            if os.path.isfile(
                    os.path.join(self.cache_dir, 'rel_sub_sample_dict.p.bz')):
                return compress_pickle.load(
                    os.path.join(
                        self.cache_dir,
                        'hard_sub_sample_dict.p.bz')), compress_pickle.load(
                            os.path.join(self.cache_dir,
                                         'rel_sub_sample_dict.p.bz'))

        hard_sub_sample_dict = {}
        rel_sub_sample_dict = {}
        count = 0
        tot_samples = len(sample_names)
        for sample_name in sample_names:
            count += 1
            sys.stdout.write(f'\r{sample_name}: {count}/{tot_samples}')
            abund_list = self.absolute_consolidated_abundance_dict[sample_name]
            if sum(abund_list) < 10000:
                continue

            # Make a redundant list of the seqs
            non_z = []
            for i, abund in enumerate(abund_list):
                if abund > 0:
                    non_z.append(i)

            redundant_list = []
            # prob_list = []
            tot = sum(abund_list)
            for i in non_z:
                seq = self.ordered_seq_names[i]
                abund = abund_list[i]
                # prob = abund/tot
                redundant_list.extend([seq for _ in range(abund)])
                # prob_list.extend([prob for _ in range(abund)])

            hard_sub_sample_list = np.random.choice(redundant_list,
                                                    10000,
                                                    replace=False)
            hard_abunds_dict = dict(Counter(hard_sub_sample_list))
            hard_sub_sample_dict[sample_name] = hard_abunds_dict

            # For soft
            norm_abund_dict = {
                self.ordered_seq_names[i]: int((abund_list[i] / tot) * 100)
                for i in non_z if int((abund_list[i] / tot) * 10000) > 0
            }
            rel_sub_sample_dict[sample_name] = norm_abund_dict

        compress_pickle.dump(
            hard_sub_sample_dict,
            os.path.join(self.cache_dir, 'hard_sub_sample_dict.p.bz'))
        compress_pickle.dump(
            rel_sub_sample_dict,
            os.path.join(self.cache_dir, 'rel_sub_sample_dict.p.bz'))
        return hard_sub_sample_dict, rel_sub_sample_dict
コード例 #12
0
def pickle_to_str(pkl_path: str = None, obj=None):
    if obj is None:
        s = base64.b64encode(open(pkl_path, "rb").read()).decode("ascii")
    else:
        dump(obj, "temp.pkl")
        s = base64.b64encode(open("temp.pkl", "rb").read()).decode("ascii")

    return s
コード例 #13
0
def test_dump_fails_on_unhandled_compression(wrong_compressions):
    with pytest.raises(ValueError):
        dump(
            1,
            "test_path.pkl",
            compression=wrong_compressions,
            set_default_extension=False,
        )
コード例 #14
0
def save_fixed_frames_dict(frames_dict: Dict[str, Dict[int,
                                                       List[numpy.ndarray]]],
                           save_path_without_extension: str):
    folder = os.path.dirname(save_path_without_extension)
    if not os.path.exists(folder):
        os.makedirs(folder)
    with open(save_path_without_extension + "_frames_dict.xz", "wb") as file:
        compress_pickle.dump(frames_dict, file, compression="lzma")
コード例 #15
0
def create_training_dataset(english_tokens,
                            german_tokens,
                            max_samples=None,
                            sample_length=100,
                            validation_split=None,
                            save_dataset=True,
                            save_interval=1000,
                            save_dir=None,
                            save_dir_validation=None,
                            save_compression='zipfile',
                            tqdm=None):
    max_samples = min(max_samples or len(english_tokens), len(english_tokens), len(german_tokens))
    # save_interval = min(save_interval, len(english_tokens), len(german_tokens))

    if tqdm is None:
        tqdm = get_tqdm()

    assert len(english_tokens) == len(german_tokens), \
        f'unexpected data mismatch for english={len(english_tokens)}, german={len(german_tokens)}'
    assert isinstance(english_tokens, list), \
        f'unexpected format received: received={type(english_tokens)}, expected=<list>'

    if save_dataset:
        os.makedirs(save_dir, exist_ok=True)
        if save_dir_validation is not None:
            os.makedirs(save_dir_validation, exist_ok=True)
            assert validation_split is not None, \
                f'provide a validation split (fraction of whole dataset created) when using `save_dir_validation=True`'
            validation_index = int((1 - validation_split)*max_samples)
        else:
            validation_index = max_samples

        for i in tqdm(range(0, max_samples, save_interval), desc='create_training_data'):
            logger.debug(f'creating dataset: i={i}, max_samples={max_samples}, step={save_interval}')

            en = english_tokens[i: i+save_interval]
            de = german_tokens[i: i+save_interval]

            for j in range(min(len(en), len(de))):
                row_sample_length = min(len(en[j]), len(de[j]), sample_length)
                en[j] = en[j][:row_sample_length]  # Truncating to same length as a first solution
                de[j] = de[j][:row_sample_length]  # Truncating to same length as a first solution

            assert len(en) == len(de), \
                f'unexpected data mismatch for english={len(en)}, german={len(de)}'

            dataset = (en, de)
            compress_pickle.dump(dataset,
                                 path=os.path.join(save_dir if i < validation_index else save_dir_validation,
                                                   f'train_{i}'),
                                 compression=save_compression)

    if not save_dataset:
        raise NotImplementedError('Down-prioritized due to too typically high memory requirements.')
        assert len(en) == len(de), \
            f'unexpected data mismatch for english={len(en)}, german={len(de)}'

        return en, de
コード例 #16
0
ファイル: corpus.py プロジェクト: Reddit-NLP/reddit-nlp
    def compile(
        self,
        compile_params: Dict[str, Any],
        progress_cb: Optional[Callable[[int], None]] = None,
    ) -> None:
        if self.compiled:
            return

        client_id = compile_params["client_id"]
        client_secret = compile_params["client_secret"]

        reddit = praw.Reddit(
            client_id=client_id,
            client_secret=client_secret,
            user_agent=constants.reddit_user_agent,
            check_for_updates=False,
            comment_kind="t1",
            message_kind="t4",
            redditor_kind="t2",
            submission_kind="t3",
            subreddit_kind="t5",
            trophy_kind="t6",
            oauth_url="https://oauth.reddit.com",
            reddit_url="https://www.reddit.com",
            short_url="https://redd.it",
            ratelimit_seconds=5,
            timeout=16,
        )
        api = PushshiftAPI(reddit)
        comments = []

        start_epoch = int(self.start_time.timestamp())
        end_epoch = int(self.end_time.timestamp())

        progress = 0
        for subreddit in self.subreddits:
            # Collect all comments matching the search parameters in a list
            for comment in api.search_comments(after=start_epoch,
                                               before=end_epoch,
                                               subreddit=subreddit):
                comments.append(comment)
                progress += 1
                if progress_cb is not None:
                    progress_cb(progress)

        if not comments:
            raise ValueError(
                "No comments found. Double-check your search parameters and API credentials."
            )

        self.document_count = len(comments)

        with open(self.comments_pickle_path, "wb") as pickle_file:
            compress_pickle.dump(comments, pickle_file, compression="gzip")

        self.compiled = True
        self.write()
コード例 #17
0
 def wrapped_method(self, *args, **kwargs):
     path = path_format.format(**{**vars(self), **kwargs})
     if os.path.exists(path):
         value = load(path)
         return value
     result = f(self, *args, **kwargs)
     os.makedirs(os.path.split(path)[0], exist_ok=True)
     dump(result, path)
     return result
コード例 #18
0
ファイル: utils.py プロジェクト: modaresimr/UnifiedAR
def saveState(vars, file, name='data'):
    import compress_pickle

    if not (os.path.exists(f'save_data/{file}/')):
        os.makedirs(f'save_data/{file}/')
    pklfile = f'save_data/{file}/{name}.pkl'
    # with open(file+name+'.pkl', 'wb') as f:
    # pickle.dump(vars, f)
    compress_pickle.dump(vars, pklfile + '.lz4')
コード例 #19
0
ファイル: check.py プロジェクト: zsyzgu/TouchType
 def stop(self):
     self.is_running = False
     file_path = 'data/' + self.file_name + '_checked.gz'
     DataManager().judgeFileExistance(file_path)
     compress_pickle.dump(self.frames, file_path)
     for frame in self.frames:
         frame.force_array = None
     pickle.dump(self.frames,
                 open('data/' + self.file_name + '.simple', 'wb'))
コード例 #20
0
def preProcessSubLandscape(pop,
                           landReps,
                           fName,
                           drive,
                           nodesAggLst,
                           nodeAggIx,
                           MF=(True, True),
                           cmpr='bz2',
                           SUM=True,
                           AGG=True,
                           SPA=True,
                           REP=True,
                           SRP=True):
    """
    Preprocesses a subset of the landscape
    Args:
        pop (list): Files list element aggregated by landscape subset
        landReps (dict): Landscape repetitions
                (spatial from monet.loadAndAggregateLandscapeDataRepetitions)
        fName (str): Filename (including path)
        drive (dict): Gene-drive dictionary
        nodesAggLst (lst): List of lists containing the indices of the nodes
                to be aggregated together
        nodeAggIx (int): Current list to process (from the nodeAggLst)
        MF (bool tuple): Male and Female boolean selectors
        cmpr (str): Compression algorithm to be used by compress-python
        SUM (bool): Population summed and gene-aggregated into one node
        AGG (bool): Population gene-aggregated in their own nodes
        SPA (bool): Genetic landscape (gene-aggregated)
        REP (bool): Garbage gene-aggregated data
        SRP (bool): Summed into one garbage gene-aggregated data
    Returns:
        None
    """
    if SUM:
        sumData = monet.sumLandscapePopulationsFromFiles(pop, MF[0], MF[1])
        sumAgg = monet.aggregateGenotypesInNode(sumData, drive)
        pkl.dump(sumAgg, fName + '_sum', compression=cmpr)
    if AGG:
        aggData = monet.loadAndAggregateLandscapeData(pop, drive, MF[0], MF[1])
        pkl.dump(aggData, fName + '_agg', compression=cmpr)
    if SPA:
        geneSpaTemp = monet.getGenotypeArraysFromLandscape(aggData)
        pkl.dump(geneSpaTemp, fName + '_spa', compression=cmpr)
    if REP or SRP:
        fLandReps = monet.filterAggregateGarbageByIndex(
            landReps, nodesAggLst[nodeAggIx])
        pkl.dump(fLandReps, fName + '_rep', compression=cmpr)
    if SRP:
        fRepsSum = [sum(i) for i in fLandReps['landscapes']]
        fRepsDict = {
            'genotypes': fLandReps['genotypes'],
            'landscapes': fRepsSum
        }
        pkl.dump(fRepsDict, fName + '_srp', compression=cmpr)
    return None
コード例 #21
0
def main():
    path_training_folders = "/home/pepamengual/UEPPi/ueppi_script/training/all_complexes/interactome_*"
    radius = 4
    number_of_processors = 27
    training_data = training_with_multiprocessing(radius, number_of_processors,
                                                  path_training_folders)
    dump(training_data,
         "single_contact_matrix",
         compression="lzma",
         set_default_extension=False)
コード例 #22
0
 def dump_predicted_set(self, x_set, y_set):
     x_set_enc = self.deep_autoencoder.predict_encoded(x_set)
     dump = {
         "categories": self.classes_vector,
         "x": x_set,
         "x_encoded": x_set_enc,
         "y": y_set
     }
     now_string = self._get_now_string()
     dump_filename = "%s_encoded.gz" % now_string
     compress_pickle.dump(dump, dump_filename)
コード例 #23
0
def test_dump_compresses(simple_dump_and_remove):
    path, compression, message, optimize = simple_dump_and_remove
    kwargs = dict()
    if compression == "zipfile":
        kwargs = dict(zipfile_compression=zipfile.ZIP_DEFLATED)
    dump(message, path, compression=compression, set_default_extension=False, **kwargs)
    with open(path, "rb") as f:
        compressed_message = f.read()
    if compression in (None, "pickle"):
        assert len(compressed_message) > len(message)
    else:
        assert len(compressed_message) < len(message)
コード例 #24
0
ファイル: record.py プロジェクト: zsyzgu/TouchType
def record_board(is_end, file_name):
    board = Board()

    while is_end.qsize() == 0:
        frame = board.getFrame()
        frame.output()
        #print(board.getFrameTime())
    cv2.destroyAllWindows()
    
    board.stop()
    print('Board compressing.', time.perf_counter())
    compress_pickle.dump(board.frames, 'data/' + file_name + '.gz')
    print('Board released.', time.perf_counter())
コード例 #25
0
    def save_pickle(self, filename):
        '''
        Saves current net model to a compressed file that can be reloaded or
        shared.
        '''
        pickle_tuple = (self.net_thickness, self.net_width, self.net_length,
                        self.slots_in, self.slots_out, self.angle_in,
                        self.angle_out, self.spi_in, self.tol, self.knots,
                        self.probe_pts, self.probe_samples)

        outfile = open(filename, 'wb')
        dump(pickle_tuple, outfile, compression='gzip')
        outfile.close()
コード例 #26
0
    def save_to_file(dataset, filepath, force_overwrite=False, compression='lzma'):
        if not isinstance(dataset, NISTDB19Dataset):
            raise RuntimeError(f"Object {type(dataset)} is not inherit from NISTDB19Dataset")

        if not os.path.exists(os.path.dirname(filepath)):
            raise RuntimeError(f"Folder {os.path.dirname(filepath)} does not exist")

        if not force_overwrite and os.path.exists(filepath):
            print(
                f"\n[WARNING]: Can't save to '{filepath}'.\nFile already exist. Add 'force_overwrite=False' for overwrite")
            return
        with open(filepath, 'wb') as dataset_file:
            compress_pickle.dump(dataset, dataset_file, compression=compression)
コード例 #27
0
def main():
    
    path_static_data = sys.argv[1] # Path of atlas is the first arg
    type_moving_data = sys.argv[2] # The type of img1 is the second arg    
    path_moving_data = sys.argv[3] # Path of img1 (moving data) is the third arg

    # Extract the basename of the moving image to use later for saving other images
    moving_data_basename = split("\.", basename(path_moving_data))[0]
    
    # Perform an initial linear registration using FLIRT and update the path
    # to the new transformed image
    path_moving_data, path_aff_mat = flirt(path_static_data, 
                                            path_moving_data, 
                                            type_moving_data,
                                            moving_data_basename)
    
    # Load the moving data and atlas
    moving_data, moving_affine = load_nifti(path_moving_data)    
    static_data, static_affine = load_nifti(path_static_data)  

    # Perform non-linear registration
    warped_moving, mapping = syn_registration(static_data, static_affine, 
                                                moving_data, moving_affine)

    # Saving the registration results
    path_warped_moving = pjoin(Path(path_moving_data).parent, 
                                moving_data_basename + "_nlinreg.nii.gz")

    nib.save(nib.Nifti1Image(warped_moving.astype(np.float32), static_affine), 
            path_warped_moving)

    # Save the optimized mapping object for future use
    dump(mapping, 
        pjoin(Path(path_moving_data).parent, moving_data_basename + "_map.gz"), 
        compression="gzip", 
        set_default_extension=True)
    
    # Apply the affine transformation and warp to all the other images
    for i in range (4, len(sys.argv)):
        img_basename = split("\.", basename(sys.argv[i]))[0]
        path_img_flrt = apply_mat(path_static_data, sys.argv[i], path_aff_mat, img_basename)

        img_data, img_affine = load_nifti(path_img_flrt)

        warped_img = apply_syn_registration(img_data, mapping)
        
        path_warped_img = pjoin(Path(sys.argv[i]).parent, 
                                    img_basename + "_nlinreg.nii.gz")

        nib.save(nib.Nifti1Image(warped_img.astype(np.float32), static_affine), 
                path_warped_img)
コード例 #28
0
ファイル: benchmark.py プロジェクト: MLjungg/CTGAN_M-Thesis
def _run_job(args):
    # Reset random seed
    np.random.seed()

    synthesizer, metadata, metrics, iteration, cache_dir, timeout, run_id = args

    name = synthesizer['name']
    dataset_name = metadata._metadata['name']

    LOGGER.info(
        'Evaluating %s on %s dataset %s with timeout %ss; iteration %s; %s',
        name, metadata.modality, dataset_name, timeout, iteration,
        used_memory())

    if timeout:
        output = _score_with_timeout(timeout, synthesizer, metadata, metrics,
                                     iteration)
    else:
        output = _score(synthesizer, metadata, metrics, iteration)

    scores = output.get('scores')
    if not scores:
        scores = pd.DataFrame({'score': [None]})
    else:
        scores = pd.DataFrame(scores)

    scores.insert(0, 'synthesizer', name)
    scores.insert(1, 'dataset', metadata._metadata['name'])
    scores.insert(2, 'modality', metadata.modality)
    scores.insert(3, 'iteration', iteration)
    scores['model_time'] = output.get('model_time')
    scores['run_id'] = run_id

    if 'error' in output:
        scores['error'] = output['error']

    if cache_dir:
        base_path = str(cache_dir /
                        f'{name}_{dataset_name}_{iteration}_{run_id}')
        if scores is not None:
            scores.to_csv(base_path + '_scores.csv', index=False)
        if 'synthetic_data' in output:
            compress_pickle.dump(output['synthetic_data'],
                                 base_path + '.data.gz')
        if 'exception' in output:
            with open(base_path + '_error.txt', 'w') as error_file:
                error_file.write(output['exception'])

    return scores
コード例 #29
0
def tune_custom_model_a_hyperparameters(
        episodes_folder: str,
        save_folder: str,
        potential_training_file_nbs: List[int],
        potential_validation_file_nbs: List[int],
        cpickled_trials_path: str = None):
    """cpickled_trials_path can be used to resume the tuning. By default it will be in the save_folder and have the
    file name Trials.xz (as we use lzma-compression with compress-pickle)."""
    if cpickled_trials_path is None:
        cpickled_trials_path = os.path.join(save_folder, "Trials.xz")

    if not os.path.exists(cpickled_trials_path):
        trials = Trials()
        current_nb_runs = 0
    else:
        with open(cpickled_trials_path, 'rb') as file:
            trials = compress_pickle.load(file, compression="lzma")
        current_nb_runs = len(trials.trials)

    best_hyperparameters = None
    while current_nb_runs < TUNING_NB_RUNS:
        best_hyperparameters = fmin(
            tune_model_a,
            space=(
                hp.loguniform('learning_rate', math.log(10**-5),
                              math.log(10**-3)),
                hp.loguniform('regularization_strength', math.log(10**-4),
                              math.log(10**-2)),
                hp.uniformint('nb_frames_to_stack', 2, 25),
                hp.choice('episodes_folder',
                          [episodes_folder]),  # not really a choice
                hp.choice('save_folder',
                          [save_folder]),  # just a way to pass more parameters
                hp.choice('potential_training_file_nbs',
                          [potential_training_file_nbs]),
                hp.choice('potential_validation_file_nbs',
                          [potential_validation_file_nbs])),
            algo=tpe.suggest,
            max_evals=current_nb_runs +
            1,  # just keep going (Note: messes with the progress bar)
            trials=trials)
        current_nb_runs += 1  # (after the += 1: == len(trials.trials))

        # Save after every tuning run
        with open(cpickled_trials_path, "wb") as file:
            compress_pickle.dump(trials, file, compression="lzma")

    print(best_hyperparameters)
    print(trials.best_trial["result"]["loss"])
コード例 #30
0
def cache_write(object, file_name, only_on_professors_computer=False):
    if only_on_professors_computer and not is_this_my_computer():
        """ Probably for your own good :-). """
        return
    # file_name = cn_(file_name) if cache_prefix else file_name
    dn = os.path.dirname(file_name)
    if not os.path.exists(dn):
        os.mkdir(dn)
    print("Writing cache...", file_name)
    with open(
            file_name,
            'wb',
    ) as f:
        compress_pickle.dump(object, f, compression="lzma")
    print("Done!")