Beispiel #1
0
def test_pickle_vocab_vectors(en_vocab):
    vectors_length = en_vocab.vectors_length
    assert vectors_length != 0

    apples = en_vocab['apples']
    oranges = en_vocab['oranges']
    hippos = en_vocab['hippos']
    
    assert apples.similarity(oranges) > apples.similarity(hippos)

    apples.vector = hippos.vector

    assert apples.similarity(oranges) < apples.similarity(hippos)

    file_ = io.BytesIO()
    cloudpickle.dump(en_vocab, file_)

    file_.seek(0)

    loaded = pickle.load(file_)

    apples = loaded['apples']
    oranges = loaded['oranges']
    hippos = loaded['hippos']

    assert apples.similarity(oranges) < apples.similarity(hippos)
Beispiel #2
0
    def _calc_T_inv(self, name, x, lambdify=True):
        """ Return the inverse transform matrix, which converts from
        world coordinates into the robot's end-effector reference frame

        name string: name of the joint or link, or end-effector
        x list: the [x,y,z] position of interest in "name"'s reference frame
        lambdify boolean: if True returns a function to calculate
                          the transform. If False returns the Sympy
                          matrix
        """

        # check to see if we have our transformation saved in file
        if (os.path.isfile('%s/%s.T_inv' % (self.config_folder,
                                                name))):
            T_inv = cloudpickle.load(open('%s/%s.T_inv' %
                                          (self.config_folder, name), 'rb'))
        else:
            T = self._calc_T(name=name)
            rotation_inv = T[:3, :3].T
            translation_inv = -rotation_inv * T[:3, 3]
            T_inv = rotation_inv.row_join(translation_inv).col_join(
                sp.Matrix([[0, 0, 0, 1]]))

            # save to file
            cloudpickle.dump(T_inv, open('%s/%s.T_inv' %
                                         (self.config_folder, name), 'wb'))

        if lambdify is False:
            return T_inv
        return sp.lambdify(self.q + self.x, T_inv)
    def test_module_locals_behavior(self):
        # Makes sure that a local function defined in another module is
        # correctly serialized. This notably checks that the globals are
        # accessible and that there is no issue with the builtins (see #211)

        pickled_func_path = os.path.join(self.tmpdir, 'local_func_g.pkl')

        child_process_script = '''
        import pickle
        import gc
        with open("{pickled_func_path}", 'rb') as f:
            func = pickle.load(f)

        assert func(range(10)) == 45
        '''

        child_process_script = child_process_script.format(
                pickled_func_path=_escape(pickled_func_path))

        try:

            from .testutils import make_local_function

            g = make_local_function()
            with open(pickled_func_path, 'wb') as f:
                cloudpickle.dump(g, f, protocol=self.protocol)

            assert_run_python_script(textwrap.dedent(child_process_script))

        finally:
            os.unlink(pickled_func_path)
Beispiel #4
0
def test_pickle_english(EN):
    file_ = io.BytesIO()
    cloudpickle.dump(EN, file_)

    file_.seek(0)

    loaded = pickle.load(file_)
Beispiel #5
0
def test_pickle_vocab(en_vocab):
    file_ = io.BytesIO()
    cloudpickle.dump(en_vocab, file_)

    file_.seek(0)

    loaded = pickle.load(file_)
 def test_load_namespace(self):
     obj = 1, 2, 3, 4
     bio = BytesIO()
     cloudpickle.dump(obj, bio)
     bio.seek(0)
     returned_obj = cloudpickle.load(bio)
     self.assertEqual(obj, returned_obj)
Beispiel #7
0
 def save(self, directory: str):
     shutil.copyfile(self.model_file, os.path.join(directory, 'dan.pt'))
     shell(f'rm -f {self.model_file}')
     with open(os.path.join(directory, 'dan.pkl'), 'wb') as f:
         cloudpickle.dump({
             'page_field': self.page_field,
             'combined_text_field': self.text_field,
             'unigram_text_field': self.unigram_field,
             'bigram_text_field': self.bigram_field,
             'trigram_text_field': self.trigram_field,
             'combined_ngrams': self.combined_ngrams,
             'unigrams': self.unigrams,
             'bigrams': self.bigrams,
             'trigrams': self.trigrams,
             'combined_max_vocab_size': self.combined_max_vocab_size,
             'unigram_max_vocab_size': self.unigram_max_vocab_size,
             'bigram_max_vocab_size': self.bigram_max_vocab_size,
             'trigram_max_vocab_size': self.trigram_max_vocab_size,
             'qanta_id_field': self.qanta_id_field,
             'n_classes': self.n_classes,
             'gradient_clip': self.gradient_clip,
             'n_hidden_units': self.n_hidden_units,
             'n_hidden_layers': self.n_hidden_layers,
             'nn_dropout': self.nn_dropout,
             'batch_size': self.batch_size,
             'use_wiki': self.use_wiki,
             'n_wiki_sentences': self.n_wiki_sentences,
             'wiki_title_replace_token': self.wiki_title_replace_token,
             'lowercase': self.lowercase,
             'pooling': self.pooling,
             'random_seed': self.random_seed,
             'config_num': self.config_num
         }, f)
Beispiel #8
0
    def _calc_Tx(self, name, x, lambdify=True):
        """ Uses Sympy to transform x from the reference frame of a joint
        or link to the origin (world) coordinates.

        name string: name of the joint or link, or end-effector
        x list: the [x,y,z] position of interest in "name"'s reference frame
        lambdify boolean: if True returns a function to calculate
                          the transform. If False returns the Sympy
                          matrix
        """

        # check to see if we have our transformation saved in file
        if (os.path.isfile('%s/%s.T' % (self.config_folder, name))):
            Tx = cloudpickle.load(open('%s/%s.T' %
                                       (self.config_folder, name), 'rb'))
        else:
            T = self._calc_T(name=name)
            # transform x into world coordinates
            Tx = T * sp.Matrix(self.x + [1])

            # save to file
            cloudpickle.dump(Tx, open('%s/%s.T' %
                                      (self.config_folder, name), 'wb'))

        if lambdify is False:
            return Tx
        return sp.lambdify(self.q + self.x, Tx)
Beispiel #9
0
    def _calc_Mq_g(self, lambdify=True):
        """ Uses Sympy to generate the force of gravity in
        joint space for the ur5

        lambdify boolean: if True returns a function to calculate
                          the Jacobian. If False returns the Sympy
                          matrix
        """

        # check to see if we have our gravity term saved in file
        if os.path.isfile('%s/Mq_g' % self.config_folder):
            Mq_g = cloudpickle.load(open('%s/Mq_g' % self.config_folder,
                                         'rb'))
        else:
            # get the Jacobians for each link's COM
            J = [self._calc_J('link%s' % ii, x=[0, 0, 0], lambdify=False)
                 for ii in range(self.num_links)]

            # transform each inertia matrix into joint space and
            # sum together the effects of arm segments' inertia on each motor
            Mq_g = sp.zeros(self.num_joints, 1)
            for ii in range(self.num_joints):
                Mq_g += J[ii].T * self._M[ii] * self.gravity
            Mq_g = sp.Matrix(Mq_g)

            # save to file
            cloudpickle.dump(Mq_g, open('%s/Mq_g' % self.config_folder,
                                        'wb'))

        if lambdify is False:
            return Mq_g
        return sp.lambdify(self.q + self.x, Mq_g)
Beispiel #10
0
def test_pickle(EN):
    file_ = io.BytesIO()
    cloudpickle.dump(EN.parser, file_)

    file_.seek(0)

    loaded = pickle.load(file_)
Beispiel #11
0
def pycloud_pickle(file_name, obj):
    # type: (Text, Any) -> None
    """Pickle an object to a file using cloudpickle."""
    import cloudpickle

    with io.open(file_name, 'wb') as f:
        cloudpickle.dump(obj, f)
Beispiel #12
0
 def _save_cache_to_file(self, cache, file_name):
     self._logger.debug(
         'Saving cache with %d entries to %s' % (len(cache), file_name))
     deterministic_cache = DeterministicCache(
         cache, self._cache_valid_for_turns)
     with open(file_name, 'wb') as io:
         pickle.dump(deterministic_cache, io)
     return True
Beispiel #13
0
def send_result (match, result):
	""" Send a match result to the webserver """
	filename = config.temp_dir + match.uuid.hex + "-match-result.txt"
	f = open(filename, 'wb')
	cloudpickle.dump(result, f)
	f.close()
	send_file_webserver_ready(filename, config.webserver_results_path)
	subprocess.call(["rm", filename])
def get_dataset(path_to_data='data/tokenized/'):
    # our input data is going to be .txt files in a folder that are formatted as follows:
    # each line is a new token (word) separated from a class label with a tab character.
    # our preprocessing includes converting to lowercase, splitting into characters, and repeating
    # the label for each character. Because punctuation counts as a word, we are doing special
    # rules with adding spaces around punctuation tokens to build a more accurate language model
    class StringProcessor:
        """
        This is a helper class (normally we would just do functions for preprocessing) to preprocess
        our text files (line by line) into the appropriate input and target data. The class is used
        because we needed to keep track of state when processing line by line.
        """
        def __init__(self):
            self.previous_label = ''
            self.space_before_punct = ['(', '``', '[', '{', '$', '#', '&']
            self.space_after_punct = ['&']
            self.previous_char = ''

        def process_line(self, line):
            chars, label = line.split('\t', 1)
            chars = chars.lower()
            label = label.rstrip()
            labels = [label] * len(chars)
            if (not chars[0] in string.punctuation or chars[0] in self.space_before_punct) and \
                    (not self.previous_char in self.space_before_punct or self.previous_char in self.space_after_punct):
                chars = ' ' + chars
                if label == self.previous_label:
                    labels = [label] + labels
                else:
                    labels = ['O'] + labels

            self.previous_label = label
            self.previous_char = chars[-1]

            return chars, labels

        def get_inputs(self, line):
            return self.process_line(line)[0]

        def get_labels(self, line):
            return self.process_line(line)[1]

    # now that we defined our preprocessor, create a new TextDataset (works over files)
    # a TextDataset is an OpenDeep class that creates one-hot encodings of inputs and outputs automatically
    # and keeps them in vocab and entity_vocab dictionaries.
    processor = StringProcessor()
    dataset = TextDataset(path=path_to_data,
                          inputs_preprocess=lambda line: processor.get_inputs(line),
                          targets_preprocess=lambda line: processor.get_labels(line),
                          level="char", sequence_length=120)

    # save the computed dictionaries to use for converting inputs and outputs from running the model.
    with open('vocab.pkl', 'wb') as f:
        pickle.dump(dataset.vocab, f, protocol=pickle.HIGHEST_PROTOCOL)
    with open('entity_vocab.pkl', 'wb') as f:
        pickle.dump(dataset.label_vocab, f, protocol=pickle.HIGHEST_PROTOCOL)

    return dataset
Beispiel #15
0
 def _write_partial_result(self, results, counter):
     filename = "pyqit-{}-{}-{}".format(
             int(time.mktime(self.time.timetuple())),
             self.id,
             counter)
     with open(filename, "w") as f:
         cloudpickle.dump(results, f)
     logging.info("Qit: Writing file {} ({} results)".format(
         filename, len(results)))
Beispiel #16
0
 def save(self, directory: str) -> None:
     shutil.copyfile(self.model_file, os.path.join(directory, 'elmo.pt'))
     shell(f'rm -f {self.model_file}')
     with open(os.path.join(directory, 'elmo.pkl'), 'wb') as f:
         cloudpickle.dump({
             'class_to_i': self.class_to_i,
             'i_to_class': self.i_to_class,
             'config_num': self.config_num,
             'random_seed': self.random_seed,
             'dropout': self.dropout
         }, f)
    def train(self, X, y, outpath=None, verbose=True):
        """
        Train intent classifier for given training data
        :param X:
        :param y:
        :param outpath:
        :param verbose:
        :return:
        """

        def build(X, y=None):
            """
            Inner build function that builds a single model.
            :param X:
            :param y:
            :return:
            """
            model = Pipeline([
                ('vectorizer', TfidfVectorizer(
                    tokenizer=self.spacy_tokenizer, preprocessor=None, lowercase=False)),
                ('clf', SVC(C=1,kernel="linear",
                            probability=True,
                            class_weight='balanced'))])

            from sklearn.model_selection import GridSearchCV

            items,counts= np.unique(y, return_counts=True)

            cv_splits = max(2, min(5, np.min(counts) // 5))

            Cs = [0.01,0.25,1, 2, 5, 10, 20, 100]
            param_grid = {'clf__C': Cs, 'clf__kernel': ["linear"]}
            grid_search = GridSearchCV(model,
                                       param_grid=param_grid,
                                       scoring='f1_weighted',
                                       cv=cv_splits,
                                       verbose=2,
                                       n_jobs=-1
                                       )
            grid_search.fit(X, y)

            return grid_search

        model = build(X, y)


        if outpath:
            with open(outpath, 'wb') as f:
                cloudpickle.dump(model, f)

                if verbose:
                    print("Model written out to {}".format(outpath))

        return model
    def persist(self, model_dir):
        # type: (Text) -> Dict[Text, Any]
        """Persist this model into the passed directory. Returns the metadata necessary to load the model again."""
        import cloudpickle

        classifier_file = os.path.join(model_dir, "ngram_featurizer.pkl")
        with io.open(classifier_file, 'wb') as f:
            cloudpickle.dump(self, f)

        return {
            "ngram_featurizer": "ngram_featurizer.pkl"
        }
def hpolib_wrapper(objfun, search, budget, result_on_terminate=0.0):

    # construct the typemap (dict mapping argument names to their types)
    # the typemap is used to reverse type erasure by serialization
    # we use the search space definition to acquire types
    hp2type = {'float': float,
               'switch': str
               }
    typemap = {k: hp2type[v.name] for k, v in search.items()}

    with open('/tmp/data.pkl', 'w') as f:
        pickle.dump({"objfun": objfun, "typemap": typemap, "search": search}, f)
 def wrapper(args):
     kwargs = {k: v for k, v in zip(search.keys(), args)}
     result = fun(**kwargs)
     try:
         with open('/tmp/results.pkl', 'r') as f: data = pickle.load(f)
     except (IOError, EOFError):
         data = {"kwargs": [], "results": []}
     with open('/tmp/results.pkl', 'w') as f:
         data["kwargs"].append(kwargs)
         data["results"].append(result)
         pickle.dump(data, f)
     return result
Beispiel #21
0
    def _save_local(self, path):
        '''Save current query object to local path
        '''
        try:
            os.makedirs(path)
        except OSError as e:
            if e.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else:
                raise

        with open(os.path.join(path, "pickle_archive"), "wb") as f:
            _cloudpickle.dump(self, f)
Beispiel #22
0
	def send_compile_result(self, path, subm_id, game_id, submission):
		""" send results of compilation back to webserver """
		runfile = path + "run_command"
		if os.path.exists(runfile):	# make sure they don't put malicious commands in here, by deleting the file if it exists
			subprocess.call(["chmod", "u+rw", runfile])
			subprocess.call("rm", "-f", runfile)
		with open(runfile, 'wb') as fo:
			cloudpickle.dump(submission, fo)
		subprocess.call(["chmod", "u+r", path + "*"])
		zipfile = path + subm_id + "-compiled.zip"
		subprocess.call(["zip", "-r", zipfile, path, "-i", path + "*"])
		comms.send_file_datastore_ready(zipfile, config.datastore_submission_path)
		reportfile = path + subm_id + "-report.txt" 
		self.save_report(submission, reportfile)
		comms.send_file_webserver_ready(reportfile, config.webserver_results_path)
		if (submission.is_ready()):
			self.send_matchmaker_compile_info(path, submission.username, game_id, subm_id)
def get_data(name, force=False, read=True):
    """ remember that the stuff is here
        d._rightmove_data__request_object = _GetDataFromUrl()
        d._rightmove_data__url

        And that weblinks go into df.url. That is what we need to recurse into.
    """
    url = urls[name]
    filename = get_hash_pickle_name(name, url)
    if os.path.exists(filename) and not force:
        print("found {}".format(filename))
    else:
        rightmove_object = rightmove_data(url)
        pickle.dump(rightmove_object, open(filename, 'wb'))
    if read:
        print("reading {}".format(filename))
        return pickle.load(open(filename, 'rb'))
Beispiel #24
0
    def save(self, path=None):
        """Save model to a pickle located at `path`"""
        if path is None:
            path = os.path.join(logger.get_dir(), "model.pkl")

        with tempfile.TemporaryDirectory() as td:
            save_state(os.path.join(td, "model"))
            arc_name = os.path.join(td, "packed.zip")
            with zipfile.ZipFile(arc_name, 'w') as zipf:
                for root, dirs, files in os.walk(td):
                    for fname in files:
                        file_path = os.path.join(root, fname)
                        if file_path != arc_name:
                            zipf.write(file_path, os.path.relpath(file_path, td))
            with open(arc_name, "rb") as f:
                model_data = f.read()
        with open(path, "wb") as f:
            cloudpickle.dump((model_data, self._act_params), f)
    def persist(cls, model_dir, **args):
        """
        save pickle model
        Args:
            model_dir: model dir
            model_name: model name

        Returns: saved json

        """
        # type: (Text) -> Dict[Text, Any]
        """Persist this model into the passed directory. Returns the metadata necessary to load the model again."""
        import cloudpickle
        model_name = str(args["model_version"]) + "_" + cls.name + ".pickle"
        classifier_file = os.path.join(model_dir, model_name)
        with io.open(classifier_file, 'wb') as f:
            cloudpickle.dump(cls, f)
        return {
            "model_name"+"_"+cls.name: model_name
        }
Beispiel #26
0
    def _calc_J(self, name, x, lambdify=True):
        """ Uses Sympy to generate the Jacobian for a joint or link

        name string: name of the joint or link, or end-effector
        lambdify boolean: if True returns a function to calculate
                          the Jacobian. If False returns the Sympy
                          matrix
        """

        # check to see if we have our Jacobian saved in file
        if os.path.isfile('%s/%s.J' % (self.config_folder, name)):
            J = cloudpickle.load(open('%s/%s.J' %
                                 (self.config_folder, name), 'rb'))
        else:
            Tx = self._calc_Tx(name, x=x, lambdify=False)
            J = []
            # calculate derivative of (x,y,z) wrt to each joint
            for ii in range(self.num_joints):
                J.append([])
                J[ii].append(Tx[0].diff(self.q[ii]))  # dx/dq[ii]
                J[ii].append(Tx[1].diff(self.q[ii]))  # dy/dq[ii]
                J[ii].append(Tx[2].diff(self.q[ii]))  # dz/dq[ii]

            end_point = name.strip('link').strip('joint')
            if end_point != 'EE':
                end_point = min(int(end_point) + 1, self.num_joints)
                # add on the orientation information up to the last joint
                for ii in range(end_point):
                    J[ii] = J[ii] + self.J_orientation[ii]
                # fill in the rest of the joints orientation info with 0
                for ii in range(end_point, self.num_joints):
                    J[ii] = J[ii] + [0, 0, 0]

            # save to file
            cloudpickle.dump(J, open('%s/%s.J' %
                                     (self.config_folder, name), 'wb'))

        J = sp.Matrix(J).T  # correct the orientation of J
        if lambdify is False:
            return J
        return sp.lambdify(self.q + self.x, J)
Beispiel #27
0
    def save(self, path):
        """Save this workflow to disk

        Parameters
        ----------
        path: str
            The path to save the workflow to
        """
        # avoid a circular import getting the version
        from nvtabular import __version__ as nvt_version

        fs = fsspec.get_fs_token_paths(path)[0]

        fs.makedirs(path, exist_ok=True)

        # point all stat ops to store intermediate output (parquet etc) at the path
        # this lets us easily bundle
        for stat in _get_stat_ops([self.output_node]):
            stat.op.set_storage_path(path, copy=True)

        # generate a file of all versions used to generate this bundle
        lib = cudf if cudf else pd
        with fs.open(fs.sep.join([path, "metadata.json"]), "w") as o:
            json.dump(
                {
                    "versions": {
                        "nvtabular": nvt_version,
                        lib.__name__: lib.__version__,
                        "python": sys.version,
                    },
                    "generated_timestamp": int(time.time()),
                },
                o,
            )

        # dump out the full workflow (graph/stats/operators etc) using cloudpickle
        with fs.open(fs.sep.join([path, "workflow.pkl"]), "wb") as o:
            cloudpickle.dump(self, o)
Beispiel #28
0
    def save(
        self,
        path: tp.Union[str, pathlib.Path],
    ) -> None:
        """
        Saves the model to disk.

        It creates a directory that includes:

        - `{path}/model.pkl`: The `Model` object instance serialized with `pickle`,
            this allows you to re-instantiate the model later.

        This allows you to save the entirety of the states of a model
        in a directory structure which can be fully restored via
        `Model.load` if the model is already instiated or `elegy.model.load`
        to load the model instance from its pickled version.

        ```python
        import elegy

        model.save('my_model')  # creates folder at 'my_model'
        del model  # deletes the existing model

        # returns a model identical to the previous one
        model = elegy.model.load('my_model')
        ```
        Arguments:
            path: path where model structure will be saved.
        """
        model = self.local()

        if isinstance(path, str):
            path = pathlib.Path(path)

        path.mkdir(parents=True, exist_ok=True)

        with open(path / "model.pkl", "wb") as f:
            cloudpickle.dump(model, f)
Beispiel #29
0
    def save_itr_params(self, itr, params):
        """Save the parameters if at the right iteration.

        Args:
            itr (int): Number of iterations. Used as the index of snapshot.
            params (obj): Content of snapshot to be saved.

        Raises:
            ValueError: If snapshot_mode is not one of "all", "last" or "gap".

        """
        file_name = None

        if self._snapshot_mode == 'all':
            file_name = os.path.join(self._snapshot_dir, 'itr_%d.pkl' % itr)
        elif self._snapshot_mode == 'last':
            # override previous params
            file_name = os.path.join(self._snapshot_dir, 'params.pkl')
        elif self._snapshot_mode == 'gap':
            if itr % self._snapshot_gap == 0:
                file_name = os.path.join(self._snapshot_dir,
                                         'itr_%d.pkl' % itr)
        elif self._snapshot_mode == 'gap_and_last':
            if itr % self._snapshot_gap == 0:
                file_name = os.path.join(self._snapshot_dir,
                                         'itr_%d.pkl' % itr)
            file_name_last = os.path.join(self._snapshot_dir, 'params.pkl')
            with open(file_name_last, 'wb') as file:
                cloudpickle.dump(params, file)
        elif self._snapshot_mode == 'none':
            pass
        else:
            raise ValueError('Invalid snapshot mode {}'.format(
                self._snapshot_mode))

        if file_name:
            with open(file_name, 'wb') as file:
                cloudpickle.dump(params, file)
Beispiel #30
0
def save(task_path: Path, result=None, task=None):
    """
    Save a :class:`~pydra.engine.core.TaskBase` object and/or results.

    Parameters
    ----------
    task_path : :obj:`Path`
        Write directory
    result : :obj:`Result`
        Result to pickle and write
    task : :class:`~pydra.engine.core.TaskBase`
        Task to pickle and write

    """
    if task is None and result is None:
        raise ValueError("Nothing to be saved")
    task_path.mkdir(parents=True, exist_ok=True)
    if result:
        with (task_path / "_result.pklz").open("wb") as fp:
            cp.dump(result, fp)
    if task:
        with (task_path / "_task.pklz").open("wb") as fp:
            cp.dump(task, fp)
Beispiel #31
0
    def save(cls, obj: Any, filename: str, **kwargs: Dict[str, Any]) -> None:
        """
    
        @param filename: str
        @param obj: Pickled
        @param kwargs: Dict[str, Any]
        @return: None
        """
        print(f'PWD: {Path.cwd()}, Database root: {cls.root}'
              ) if cls.debug else None
        cls.root.mkdir(parents=True, exist_ok=True)
        filename = Path(f'{filename}.pickle')

        try:
            pobj = Pickled(obj=obj)
            pobj._save_data_hook(**kwargs)
            with open(cls.root / filename, 'wb') as f:
                pickle.dump(pobj, f)
        except Exception as e:
            print('Тип данных не соответвует базе данных')
            print('isinstance: ', isinstance(obj, SEq))
            print('type: ', type(obj))
            print(e)
Beispiel #32
0
    def save(self, fpath: str = None) -> str:
        """
        Saves the Flow to a file by serializing it with cloudpickle.  This method is
        recommended if you wish to separate out the building of your Flow from its deployment.

        Args:
            - fpath (str, optional): the filepath where your Flow will be saved; defaults to
                `~/.prefect/flows/FLOW-NAME.prefect`

        Returns:
            - str: the full location the Flow was saved to
        """
        if fpath is None:
            path = "{home}/flows".format(home=prefect.context.config.home_dir)
            fpath = Path(os.path.expanduser(
                path)) / "{}.prefect".format(  # type: ignore
                    slugify(self.name))
            assert fpath is not None  # mypy assert
            fpath.parent.mkdir(exist_ok=True, parents=True)
        with open(str(fpath), "wb") as f:
            cloudpickle.dump(self, f)

        return str(fpath)
Beispiel #33
0
def save_game_state_on_exit(orchestrator_object: IGameOrchestrator) -> None:
    """
    This method it's called whenever the game crashes or it's stopped, and basically it saves all the information
    of a played game, saving all the current instantiated objects in a file, using pickle library.

    :param IGameOrchestrator orchestrator_object: The orchestrator, that controls all the game.
    :rtype: None.
    """
    now = datetime.now()
    current_time = now.strftime("%m-%d-%Y-%H:%M:%S")
    main_player = orchestrator_object.game.players[0]
    save_file_name = '{date}-saved-game-{name}-{job}.pckl'.format(
        date=current_time,
        name=main_player.name,
        job=main_player.job.get_name(),
    )

    save_file_path = '{working_directory}/saved_games/{file}'.format(
        working_directory=str(get_project_root()), file=save_file_name)

    f = open(save_file_path, 'wb')
    cloudpickle.dump(orchestrator_object, f)
    f.close()
Beispiel #34
0
    def _save(self, data: pd.DataFrame) -> None:
        save_path = self._get_save_path()
        print(save_path)
        # mlflow.sklearn.save_model(sk_model=data,
        #                          path=save_path,
        #                          conda_env = self.conda_env,
        #                          **self._save_args)

        if not os.path.exists(".tmp"):
            os.makedirs(".tmp")

        model_path = os.path.join(".tmp", "model.pkl")

        with open(model_path, "wb+") as f:
            cloudpickle.dump(data, f)

        mlflow.pyfunc.save_model(
            save_path,
            python_model=ScikitWrapper(),
            artifacts={"model_path": model_path},
            code_path=["src/stitch_classify"],
            conda_env="src/environment.yml",
        )
Beispiel #35
0
def save_model(model: SerializableModel, path: str, no_zip=False):
    """
    Save model as a ZIP or a directory at path. Path may or may not contain the .zip extension
    Args:
        model (SerializableModel): Model to save
        path (str): Path to save the model at. May or may not have the .zip extension
        no_zip (bool): Do not create a zip
    """

    if not (hasattr(model, "serialize") and hasattr(model, "deserialize")):
        raise NotImplementedError(f"Serialize/Deserialize not implemented for model of class {model.__class__}")

    work_dir = None
    try:
        #  First create a temporary directory to put all contents in
        work_dir = tempfile.mkdtemp()

        with open(f"{work_dir}/serialized.pkl", "wb") as f:
            cloudpickle.dump(model, f)

        #  Now let the model save whatever it wants
        model.serialize(work_dir)

        if no_zip:
            shutil.rmtree(path, ignore_errors=True)
            shutil.copytree(work_dir, path)
            return

        #  Package it up into a zip and clean up the directory
        if path.endswith(".zip"):
            path = path[:-len(".zip")]

        shutil.make_archive(path, "zip", work_dir)

    finally:
        if work_dir is not None:
            shutil.rmtree(work_dir)
Beispiel #36
0
def find_out_feasible_states(env,
                             log_dir,
                             distance_threshold=0.1,
                             brownian_variance=1,
                             animate=False):
    no_new_states = 0
    with env.set_kill_outside():
        load_dir = 'data_upload/state_collections/'
        old_all_feasible_starts = pickle.load(
            open(osp.join(load_dir, 'all_feasible_states.pkl'), 'rb'))
        out_feasible_starts = StateCollection(
            distance_threshold=distance_threshold)
        print('number of feasible starts: ', old_all_feasible_starts.size)
        for start in old_all_feasible_starts.state_list:
            obs = env.reset(init_state=start)
            if obs[16] > -0.5:
                # print("got one more up to ", out_feasible_starts.size)
                out_feasible_starts.append([start])
        print("number of out feasible starts:", out_feasible_starts.size)
        while no_new_states < 5:
            total_num_starts = out_feasible_starts.size
            starts = out_feasible_starts.sample(100)
            new_starts = generate_starts(env,
                                         starts=starts,
                                         horizon=1000,
                                         size=100000,
                                         variance=brownian_variance,
                                         animated=animate,
                                         speedup=10)
            out_feasible_starts.append(new_starts)
            num_new_starts = out_feasible_starts.size - total_num_starts
            logger.log("number of new states: " + str(num_new_starts))
            if num_new_starts < 10:
                no_new_states += 1
            with open(osp.join(log_dir, 'all_out_feasible_states.pkl'),
                      'wb') as f:
                cloudpickle.dump(out_feasible_starts, f, protocol=3)
Beispiel #37
0
    def persist(self, path, persistor=None, create_unique_subfolder=True):
        entity_extractor_file, entity_extractor_config_file = None, None
        timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')

        if create_unique_subfolder:
            dir_name = os.path.join(path, "model_" + timestamp)
            os.mkdir(dir_name)
        else:
            dir_name = path

        data_file = os.path.join(dir_name, "training_data.json")
        classifier_file, ner_dir = None, None
        if self.intent_classifier:
            classifier_file = os.path.join(dir_name, "intent_classifier.pkl")
        if self.entity_extractor:
            ner_dir = os.path.join(dir_name, 'ner')
            if not os.path.exists(ner_dir):
                os.mkdir(ner_dir)
            entity_extractor_config_file = os.path.join(ner_dir, "config.json")
            entity_extractor_file = os.path.join(ner_dir, "model")

        write_training_metadata(dir_name, timestamp, data_file, self.name,
                                self.language_name, classifier_file, ner_dir)

        with open(data_file, 'w') as f:
            f.write(self.training_data.as_json(indent=2))
        if self.intent_classifier:
            with open(classifier_file, 'wb') as f:
                cloudpickle.dump(self.intent_classifier, f)
        if self.entity_extractor:
            with open(entity_extractor_config_file, 'w') as f:
                json.dump(self.entity_extractor.ner.cfg, f)

            self.entity_extractor.ner.model.dump(entity_extractor_file)

        if persistor is not None:
            persistor.send_tar_to_s3(dir_name)
def create_and_save_data_preparation(data_preparation_function: Callable,
                                     path: str) -> None:
    """Create, serialize and save a DataPreparation instance.

    Parameters
    ----------
    data_preparation_function : Callable
        A function to use as data preparation. You can use your own custom code for
        data preparation, but it must be wrapped in a single function.

        NOTE: If the data preparation includes any kind of fitting on the training dataset
        (e.g. Scikit Learn transformers), it must be performed outside the final data
        preparation function to save. Fit the transformer(s) outside the function and put
        only the transform method inside it. Furthermore, if the entire data preparation
        is performed with a single Scikit-Learn transformer, you can directly pass it
        (fitted) to this method.
    path : str
        Local path to save the data preparation to.

    Raises
        ------
        TypeError
            If data_preparation_function is not a function (Callable type)
        ClearboxWrapperException
            If data preparation path already exists.
    """
    if not isinstance(data_preparation_function, Callable):
        raise TypeError(
            "data_preparation_function should be a Callable, got '{}'".format(
                type(data_preparation_function)))
    if os.path.exists(path):
        raise ClearboxWrapperException(
            "Data preparation path '{}' already exists".format(path))

    data_preparation = DataPreparation(data_preparation_function)
    with open(path, "wb") as data_preparation_serialized_file:
        cloudpickle.dump(data_preparation, data_preparation_serialized_file)
Beispiel #39
0
    def save(cfg, filename: str):
        """
        Args:
            cfg: an omegaconf config object
            filename: yaml file name to save the config file
        """
        logger = logging.getLogger(__name__)
        try:
            cfg = deepcopy(cfg)
        except Exception:
            pass
        else:
            # if it's deep-copyable, then...
            def _replace_type_by_name(x):
                if "_target_" in x and callable(x._target_):
                    try:
                        x._target_ = _convert_target_to_string(x._target_)
                    except AttributeError:
                        pass

            # not necessary, but makes yaml looks nicer
            _visit_dict_config(cfg, _replace_type_by_name)

        try:
            with PathManager.open(filename, "w") as f:
                OmegaConf.save(cfg, f)
        except Exception:
            logger.exception("Unable to serialize the config to yaml. Error:")
            new_filename = filename + ".pkl"
            try:
                # retry by pickle
                with PathManager.open(new_filename, "wb") as f:
                    cloudpickle.dump(cfg, f)
                logger.warning(
                    f"Config saved using cloudpickle at {new_filename} ...")
            except Exception:
                pass
Beispiel #40
0
    def _calc_R(self, name, lambdify=True):
        """Uses Sympy to generate the rotation matrix for a joint or link

        Parameters
        ----------
        name : string
            name of the joint, link, or end-effector
        lambdify : boolean, optional (Default: True)
            if True returns a function to calculate the matrix.
            If False returns the Sympy matrix
        """
        R = None
        R_func = None
        filename = name + "_R"

        # check to see if we have the rotation matrix saved in file
        R, R_func = self._load_from_file(filename, lambdify=True)

        if R is None and R_func is None:
            # if no saved file was loaded, generate function
            print("Generating rotation matrix function.")
            R = self._calc_T(name=name)[:3, :3]

            # save to file
            abr_control.utils.os_utils.makedirs(
                "%s/%s" % (self.config_folder, filename)
            )
            cloudpickle.dump(
                sp.Matrix(R),
                open("%s/%s/%s" % (self.config_folder, filename, filename), "wb"),
            )

        if R_func is None:
            R_func = self._generate_and_save_function(
                filename=filename, expression=R, parameters=self.q
            )
        return R_func
Beispiel #41
0
    def persist(self, path, persistor=None, create_unique_subfolder=True):
        timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')

        if create_unique_subfolder:
            dir_name = os.path.join(path, "model_" + timestamp)
            os.mkdir(dir_name)
        else:
            dir_name = path

        data_file = os.path.join(dir_name, "training_data.json")
        classifier_file = os.path.join(dir_name, "intent_classifier.dat")
        entity_extractor_file = os.path.join(dir_name, "entity_extractor.dat")
        entity_synonyms_file = os.path.join(
            dir_name,
            "index.json") if self.training_data.entity_synonyms else None

        write_training_metadata(dir_name, timestamp, data_file,
                                MITIE_SKLEARN_BACKEND_NAME, 'en',
                                classifier_file, entity_extractor_file,
                                entity_synonyms_file, self.fe_file)

        with open(data_file, 'w') as f:
            f.write(self.training_data.as_json(indent=2))

        if self.training_data.entity_synonyms:
            with open(entity_synonyms_file, 'w') as f:
                json.dump(self.training_data.entity_synonyms, f)

        if self.intent_classifier:
            with open(classifier_file, 'wb') as f:
                cloudpickle.dump(self.intent_classifier, f)

        self.entity_extractor.save_to_disk(entity_extractor_file,
                                           pure_model=True)

        if persistor is not None:
            persistor.send_tar_to_s3(dir_name)
Beispiel #42
0
def pickle_model(fold_output_path, trained_workflow, model_name='model.pkl'):
    """Pickle and reload trained workflow.

    If workflow can't be pickled, print warning and return original workflow.

    Parameters
    ----------
    fold_output_path : str
        the path into which the model will be pickled
    trained_workflow : a rampwf.workflow
        the workflow to be pickled
    model_name : str (default='model.pkl')
        the file name of the pickled workflow
    Returns
    -------
    trained_workflow : a rampwf.workflow
        either the input workflow or the pickled and reloaded workflow
    """
    msg = "Warning: model can't be pickled."
    model_file = os.path.join(fold_output_path, model_name)
    try:
        with open(model_file, 'wb') as pickle_file:
            cloudpickle.dump(trained_workflow, pickle_file)
    except pickle.PicklingError as e:
        print_warning(msg)
        print_warning(e)
        return trained_workflow
    else:
        # check if dumped trained_workflow can be loaded
        try:
            with open(model_file, 'rb') as pickle_file:
                trained_workflow = cloudpickle.load(pickle_file)
        except Exception as e:
            print_warning(msg)
            print_warning(e)

    return trained_workflow
Beispiel #43
0
def test_run_flow(monkeypatch):
    file_path = os.path.dirname(prefect.environments.execution.dask.k8s.__file__)
    environment = KubernetesJobEnvironment(path.join(file_path, "job.yaml"))

    flow_runner = MagicMock()
    monkeypatch.setattr(
        "prefect.engine.get_default_flow_runner_class",
        MagicMock(return_value=flow_runner),
    )

    with tempfile.TemporaryDirectory() as directory:
        with open(os.path.join(directory, "flow_env.prefect"), "w+") as env:
            flow = prefect.Flow("test")
            flow_path = os.path.join(directory, "flow_env.prefect")
            with open(flow_path, "wb") as f:
                cloudpickle.dump(flow, f)

        with set_temporary_config({"cloud.auth_token": "test"}):
            with prefect.context(
                flow_file_path=os.path.join(directory, "flow_env.prefect")
            ):
                environment.run_flow()

        assert flow_runner.call_args[1]["flow"].name == "test"
Beispiel #44
0
 def _finalize(self, link_as=None, name_hint=None):
     log.debug(f'finalize hint={name_hint} link_as={link_as} {self._state}')
     if link_as and self._state == 'spun_down':
         self.hooks.save.in_reverse()
         temp_path = self.path
         self.path = temp.unique_dir(self._parent_path, hint=name_hint)
         log.debug(f'saving to temp {temp_path}')
         self._state = 'saving'
         self.expiration.depend_on_loaded_python_modules()
         self.log.finalize()
         with open(os.path.join(temp_path, 'machine.clpickle'), 'wb') as f:
             cloudpickle.dump(self, f)
         log.debug(f'moving {temp_path} to {self.path}')
         os.rename(temp_path, self.path)
         self._state == 'saved'
         link_this = self.path
     else:
         self.log.finalize()
         assert self._state in ('spun_down', 'loaded', 'dropped')
         log.info(f'discarding {self.path}')
         # TODO: track whether the step will be transient-last and reflink?
         with open(os.path.join(self.path, 'log.txt')) as f:
             self.log_contents = f.read()
         temp.remove(self.path)
         link_this = self._parent_path
         self._state = 'dropped'
     if (link_this and link_as
             and os.path.realpath(link_as) != os.path.realpath(link_this)):
         log.debug(f'linking {link_this} to {link_as}')
         if os.path.lexists(link_as):
             if os.path.exists(link_as) and not needs_a_rebuild(link_as):
                 log.critical(f'Refusing to overwrite fresh {link_as}')
                 raise RuntimeError(f'Not overriding fresh {link_as}')
             os.unlink(link_as)
         os.symlink(link_this, link_as)
         return link_as
Beispiel #45
0
 def save(self, directory: str):
     shutil.copyfile(self.model_file, os.path.join(directory, "dan.pt"))
     shell(f"rm -f {self.model_file}")
     with open(os.path.join(directory, "dan.pkl"), "wb") as f:
         cloudpickle.dump(
             {
                 "page_field": self.page_field,
                 "combined_text_field": self.text_field,
                 "unigram_text_field": self.unigram_field,
                 "bigram_text_field": self.bigram_field,
                 "trigram_text_field": self.trigram_field,
                 "combined_ngrams": self.combined_ngrams,
                 "unigrams": self.unigrams,
                 "bigrams": self.bigrams,
                 "trigrams": self.trigrams,
                 "combined_max_vocab_size": self.combined_max_vocab_size,
                 "unigram_max_vocab_size": self.unigram_max_vocab_size,
                 "bigram_max_vocab_size": self.bigram_max_vocab_size,
                 "trigram_max_vocab_size": self.trigram_max_vocab_size,
                 "qanta_id_field": self.qanta_id_field,
                 "n_classes": self.n_classes,
                 "gradient_clip": self.gradient_clip,
                 "n_hidden_units": self.n_hidden_units,
                 "n_hidden_layers": self.n_hidden_layers,
                 "nn_dropout": self.nn_dropout,
                 "batch_size": self.batch_size,
                 "use_wiki": self.use_wiki,
                 "n_wiki_sentences": self.n_wiki_sentences,
                 "wiki_title_replace_token": self.wiki_title_replace_token,
                 "lowercase": self.lowercase,
                 "pooling": self.pooling,
                 "random_seed": self.random_seed,
                 "config_num": self.config_num,
             },
             f,
         )
Beispiel #46
0
    def map(self, parallel_task, args):
        uid = 'slurmc.' + str(uuid4())
        outd = os.path.join(self.tmp_dir, uid)
        os.makedirs(outd)

        batches = list(split_evenly(args, self.max_tasks))

        sfile = os.path.join(outd, 'exdata.cloudpickle')
        with open(sfile, 'wb') as f:
            cloudpickle.dump(
                {
                    'f': parallel_task,
                    'args': batches,
                    'outd': outd
                }, f)

        self.template = parse_template(self.template, sfile=sfile)

        n_tasks = len(batches)

        ar = self.poll_loop(n_tasks, outd)
        for i in tqdm(ar, desc="(SLURM)", total=n_tasks):
            pass
        shutil.rmtree(outd)
Beispiel #47
0
def plot_pitch_slider(event_start_frame=LIV_GOALS[MATCH]['START'],
                      event_end_frame=LIV_GOALS[MATCH]['END'],
                      field_dimen=(106.0, 68.0), data=None, frame=None):
    tracking_frames = range(event_start_frame, event_end_frame+1)
    # make figure
    fig_dict = {
        "data": [],
        "layout": {},
        "frames": []
    }
    fig_dict['layout'] = generate_pitch_layout(
        tracking_frames)
    fig_dict['data'] = generate_data_for_frame(frame_num=event_start_frame)
    fig_dict['frames'] = generate_plotly_frames_for_event(tracking_frames)

    # fig = go.Figure(fig_dict)

    # fig.show()

    # https://anvil.works/forum/t/serialization-of-graph-objects/4134/2
    with open('../datahub/lastrow/{}_fig_dict_white.pickle'.format(LIV_GOALS[MATCH]['PLAY']), 'wb') as handle:
        cloudpickle.dump(fig_dict, handle,
                         protocol=cloudpickle.DEFAULT_PROTOCOL)
        handle.close()
Beispiel #48
0
    def subrun(self):
        with unique_tempdir() as tmp_f:
            with open(os.path.join(tmp_f, "input"), "w") as f:
                cloudpickle.dump((self.func, self.args), f)
            server = '''
import sys
import cloudpickle
import StringIO
input_ = StringIO.StringIO(sys.stdin.read())
stdout = sys.stdout
sys.stdout = StringIO.StringIO()
input_.seek(0)
(func, args) = cloudpickle.load(input_);
res = func(args)
for line in res:
    stdout.write(line)
                '''

            cmd = '''cd {cwd} && cat {input} | OMP_NUM_THREADS=1 nice -n +19 python -c "{server}" > {output} '''.format(
                server=server,
                input=os.path.join(tmp_f, "input"),
                output=os.path.join(tmp_f, "output"),
                cwd=os.path.dirname(os.path.abspath(__file__)))
            process = subprocess.Popen("/bin/bash -c '{}'".format(
                cmd.replace("'", "'\\''")),
                                       shell=True)
            while process.poll() is None:
                if self.terminate:
                    process.kill()
                time.sleep(1)
            nothing, err = process.communicate()
            retcode = process.poll()
            if retcode and not self.terminate:
                raise ProcessError(retcode, None, err)
            if self.result_file is not None:
                shutil.copy(os.path.join(tmp_f, "output"), self.result_file)
Beispiel #49
0
def save_itr_params(itr, params, use_cloudpickle=True, pkl_prefix=''):
    if _snapshot_dir:
        if _snapshot_mode == 'all':
            file_name = osp.join(get_snapshot_dir(),
                                 pkl_prefix + 'itr_%d.pkl' % itr)
        elif _snapshot_mode == 'last':
            # override previous params
            file_name = osp.join(get_snapshot_dir(), pkl_prefix + 'params.pkl')
        elif _snapshot_mode == "gap":
            if itr == 0 or (itr + 1) % _snapshot_gap == 0:
                file_name = osp.join(get_snapshot_dir(),
                                     pkl_prefix + 'itr_%d.pkl' % itr)
            else:
                return
        elif _snapshot_mode == 'none':
            return
        else:
            raise NotImplementedError
        if use_cloudpickle:
            import cloudpickle
            with open(file_name, 'wb') as f:
                cloudpickle.dump(params, f, protocol=3)
        else:
            joblib.dump(params, file_name, compress=3)
Beispiel #50
0
    def dump_trajectories(self, force=False):
        """Dumps trajectories in a new shard.

    Should be called at most once per epoch.

    Args:
      force: (bool) Whether to complete unfinished trajectories and create
        a new shard even if we have not reached the minimum size.
    """
        if self.trajectory_dump_dir is None:
            return
        gfile.makedirs(self.trajectory_dump_dir)

        trajectories = self.train_env.trajectories
        if force:
            trajectories.complete_all_trajectories()

        # complete_all_trajectories() also adds trajectories that were just reset.
        # We don't want them since they have just the initial observation and no
        # actions, so we filter them out.
        def has_any_action(trajectory):
            return (trajectory.time_steps
                    and trajectory.time_steps[0].action is not None)

        self._trajectory_buffer.extend(
            filter(has_any_action, trajectories.completed_trajectories))

        trajectories.clear_completed_trajectories()
        ready = (len(self._trajectory_buffer) >=
                 self._trajectory_dump_min_count_per_shard)
        if ready or force:
            shard_path = os.path.join(self.trajectory_dump_dir,
                                      "{}.pkl".format(self.epoch))
            with gfile.GFile(shard_path, "wb") as f:
                pickle.dump(self._trajectory_buffer, f)
            self._trajectory_buffer = []
Beispiel #51
0
def main():
    args = parse_args()
    setup_logging(args.logfile)

    log = get_logger()

    assert (0 <= args.hidden_fraction <= 1)

    np.random.seed(args.random_seed)
    tf.set_random_seed(args.random_seed)
    log.info('*' * 100)
    log.info('[Starting MC experiment]')
    log_dict(log.info, vars(args))
    log.info('[Loading target GIs]')
    with open(args.target_gis, 'rb') as f:
        tgt_gis = cpkl.load(f)

    log.info('[Loading source GIs]')
    with open(args.source_gis, 'rb') as f:
        src_gis = cpkl.load(f)

    log.info('[Loading sim scores]')
    with open(args.sim_scores, 'rb') as f:
        sim_scores_data = cpkl.load(f)
    sim_scores = sim_scores_data['values']
    sim_scores = sim_scores / np.max(sim_scores)  # Normalize

    # log.info('\t- %d scores', len(sim_scores))

    hp_param_space = xsmf_param_space(args)

    results, models, training_curves, trials = \
        run_xsmf_experiment(tgt_gis=tgt_gis,
                            src_gis=src_gis,
                            space=hp_param_space,
                            sim_scores=sim_scores,
                            val_hf=args.val_hidden_fraction,
                            test_hf=args.hidden_fraction,
                            n_repeats=args.n_repeats,
                            hp_iters=args.n_hyperopt_iters,
                            hp_seed=args.random_seed)
    # Save results and other information
    log_results(results['summary'])
    with open(args.results_output, 'w') as f:
        json.dump(results, f, indent=2)

    with open(args.training_curve_output, 'wb') as f:
        cpkl.dump(training_curves, f)

    # TODO: save models the models cannot be pickled at the moment
    # We will need to implement a from dict and a to dict method
    with open(args.models_output, 'wb') as f:
        cpkl.dump(trials, f)

    with open(args.trials_output, 'wb') as f:
        cpkl.dump(trials, f)
Beispiel #52
0
def _main():
    parser = argparse.ArgumentParser(description='Encodes images in a directory.',
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(dest='images_dir', type=str, help='The images directory.')
    parser.add_argument(dest='model_path', type=str, help='The path to Keras model,')
    parser.add_argument(dest='output_file', type=str, help='The output .pickle file.')
    parser.add_argument('--image-size', type=int, default=224, help='The image size.')
    parser.add_argument('--only-hor', action='store_true', default=False, help='Only do horizontal flips.')
    parser.add_argument('--fill', action='store_true', default=False, help='Zoom to fill letterbox if the image is small.')
    args = parser.parse_args()

    images = os.listdir(args.images_dir)
    full_paths = [os.path.join(args.images_dir, img) for img in images]
    embeddings = encode_images(
        model_path=args.model_path,
        images=full_paths,
        letterbox_size=args.image_size,
        verbose=True,
        onlyhor=args.only_hor,
        fill=args.fill
    )

    with open(args.output_file, 'wb') as of:
        cloudpickle.dump(embeddings, of)
Beispiel #53
0
def _save_model(sk_model, output_path, serialization_format, protocol=None):
    """
    :param sk_model: The scikit-learn model to serialize.
    :param output_path: The file path to which to write the serialized model.
    :param serialization_format: The format in which to serialize the model. This should be one of
                                 the following: ``mlflow.sklearn.SERIALIZATION_FORMAT_PICKLE`` or
                                 ``mlflow.sklearn.SERIALIZATION_FORMAT_CLOUDPICKLE``.
    :param protocol: The pickle protocol version. If ``None``, the default protocol version
                     from cloudpickle will be used.
    """
    with open(output_path, "wb") as out:
        if serialization_format == SERIALIZATION_FORMAT_PICKLE:
            pickle.dump(sk_model, out)
        elif serialization_format == SERIALIZATION_FORMAT_CLOUDPICKLE:
            import cloudpickle

            cloudpickle.dump(sk_model, out, protocol)
        else:
            raise MlflowException(
                message=
                "Unrecognized serialization format: {serialization_format}".
                format(serialization_format=serialization_format),
                error_code=INTERNAL_ERROR,
            )
Beispiel #54
0
    def subrun(self):
        with unique_tempdir() as tmp_f:
            with open(os.path.join(tmp_f, "input"), "w") as f:
                cloudpickle.dump((self.func, self.args), f)
            server = '''
import sys
import cloudpickle
import StringIO
input_ = StringIO.StringIO(sys.stdin.read())
stdout = sys.stdout
sys.stdout = StringIO.StringIO()
input_.seek(0)
(func, args) = cloudpickle.load(input_);
res = func(args)
for line in res:
    stdout.write(line)
                '''

            cmd = '''cd {cwd} && cat {input} | OMP_NUM_THREADS=1 nice -n +19 python -c "{server}" > {output} '''.format(
                    server=server,
                    input=os.path.join(tmp_f, "input"),
                    output=os.path.join(tmp_f, "output"),
                    cwd=os.path.dirname(os.path.abspath(__file__))
            )
            process = subprocess.Popen("/bin/bash -c '{}'".format(cmd.replace("'", "'\\''")),
                                       shell=True)
            while process.poll() is None:
                if self.terminate:
                    process.kill()
                time.sleep(1)
            nothing, err = process.communicate()
            retcode = process.poll()
            if retcode and not self.terminate:
                raise ProcessError(retcode, None, err)
            if self.result_file is not None:
                shutil.copy(os.path.join(tmp_f, "output"), self.result_file)
Beispiel #55
0
def get_env(version='v1.0-mini', env_path=None, save_pkl_path=None, render_bev=True, config={}):
    if env_path is not None:
        t = time.time()
        env = cloudpickle.load(open(env_path, 'rb'))    
        print(f"env load time: {time.time()-t}")
    else:
        env_config = config
        env_config['config']['NuScenesAgent_config']['version'] = version    
        env = class_from_path(env_config['type'])(env_config['config'])
    
    if not render_bev:
        env.config['render_type'] = []
    
    if 'pedestrian' in env.graphics.plot_list :
        env.graphics.plot_list.remove('pedestrian')
    if 'map_info' in env.graphics.plot_list:
        env.graphics.plot_list.remove('map_info')
    if 'cam' in env.graphics.plot_list:
        env.graphics.plot_list.remove('cam')

    if save_pkl_path is not None:
        cloudpickle.dump(env, open(save_pkl_path, 'wb'))
        
    return env
Beispiel #56
0
    def add_flow(self, flow: "Flow") -> str:
        """
        Method for storing a new flow as bytes in the local filesytem.

        Args:
            - flow (Flow): a Prefect Flow to add

        Returns:
            - str: the location of the newly added flow in this Storage object

        Raises:
            - ValueError: if a flow with the same name is already contained in this storage
        """
        if flow.name in self:
            raise ValueError(
                'Name conflict: Flow with the name "{}" is already present in this storage.'
                .format(flow.name))

        flow_location = os.path.join(self.directory,
                                     "{}.prefect".format(slugify(flow.name)))
        with open(flow_location, "wb") as f:
            cloudpickle.dump(flow, f)
        self.flows[flow.name] = flow_location
        return flow_location
Beispiel #57
0
 def save(self, directory: str):
     shutil.copyfile(self.model_file, os.path.join(directory, 'rnn.pt'))
     shell(f'rm -f {self.model_file}')
     with open(os.path.join(directory, 'rnn.pkl'), 'wb') as f:
         cloudpickle.dump(
             {
                 'page_field': self.page_field,
                 'text_field': self.text_field,
                 'qanta_id_field': self.qanta_id_field,
                 'n_classes': self.n_classes,
                 'gradient_clip': self.gradient_clip,
                 'n_hidden_units': self.n_hidden_units,
                 'n_hidden_layers': self.n_hidden_layers,
                 'lr': self.lr,
                 'nn_dropout': self.nn_dropout,
                 'sm_dropout': self.sm_dropout,
                 'batch_size': self.batch_size,
                 'use_wiki': self.use_wiki,
                 'n_wiki_sentences': self.n_wiki_sentences,
                 'wiki_title_replace_token': self.wiki_title_replace_token,
                 'lowercase': self.lowercase,
                 'random_seed': self.random_seed,
                 'config_num': self.config_num
             }, f)
def save_table(data_frame, file_path, metadata_ext='.pklmetadata'):
    """
    Saves a DataFrame to disk along with its metadata in a pickle format.

    This function saves a  DataFrame to disk along with its metadata from
    the catalog.

    Specifically, this function saves the DataFrame in the given
    file path, and saves the metadata in the same directory (as the
    file path) but with a different extension. This extension can be
    optionally given by the user (defaults to '.pklmetadata').

    Args:
        data_frame (DataFrame): The DataFrame that should be saved.

        file_path (string): The file path where the DataFrame must be stored.

        metadata_ext (string): The metadata extension that should be used while
            storing the metadata information. The default value is
            '.pklmetadata'.

    Returns:
        A Boolean value of True is returned if the DataFrame is successfully
        saved.

    Raises:
        AssertionError: If `data_frame` is not of type pandas
         DataFrame.
        AssertionError: If `file_path` is not of type string.
        AssertionError: If `metadata_ext` is not of type string.
        AssertionError: If a file cannot written in the given `file_path`.

    Examples:

        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
        >>> em.save_table(A, './A.pkl') # will store two files ./A.pkl and ./A.pklmetadata

        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
        >>> em.save_table(A, './A.pkl', metadata_ext='.pklmeta') # will store two files ./A.pkl and ./A.pklmeta


    See Also:
        :meth:`~py_entitymatching.load_table`

    Note:
        This function is a bit different from to_csv_metadata, where the
        DataFrame is stored in a CSV file format. The CSV file format can be
        viewed using a text editor. But a DataFrame stored using 'save_table' is
        stored in a special format, which cannot be viewed with a text editor.
        The reason we have save_table is, for larger DataFrames it is
        efficient to pickle the DataFrame to disk than writing the DataFrame
        in CSV format.
    """
    # Validate the input parameters

    validate_object_type(data_frame, pd.DataFrame)

    validate_object_type(file_path, six.string_types, error_prefix='Input file path')

    validate_object_type(metadata_ext, six.string_types, error_prefix='Input Metadata ext')

    # Get the file_name (with out extension) and the extension from the given
    #  file path. For example if the file_path was /Users/foo/file.csv then
    # the file_name will be /Users/foo/file and the extension will be '.csv'
    file_name, _ = os.path.splitext(file_path)

    # The metadata file name is the same file name but with the extension
    # given by the user
    metadata_filename = file_name + metadata_ext

    # Check if the file exists in the file_path and whether we have
    # sufficient access privileges to write in that path
    can_write, file_exists = ps._check_file_path(file_path)

    if can_write:
        # If the file already exists then issue a warning and overwrite the
        # file
        if file_exists:
            logger.warning(
                'File already exists at %s; Overwriting it', file_path)
            # we open the file_path in binary mode, as we are writing in
            # binary format'
            with open(file_path, 'wb') as file_handler:
                cloudpickle.dump(data_frame, file_handler)
        else:
            #
            with open(file_path, 'wb') as file_handler:
                cloudpickle.dump(data_frame, file_handler)
    else:
        # Looks like we cannot write the file in the given path. Raise an
        # error in this case.
        logger.error('Cannot write in the file path %s; Exiting', file_path)
        raise AssertionError('Cannot write in the file path %s', file_path)

    # Once we are done with writing the DataFrame, we will write the metadata
    #  now

    # Initialize a metadata dictionary to hold the metadata of DataFrame from
    #  the catalog
    metadata_dict = collections.OrderedDict()

    # get all the properties for the input data frame
    # # Check if the DataFrame information is present in the catalog
    properties = {}
    if cm.is_dfinfo_present(data_frame) is True:
        properties = cm.get_all_properties(data_frame)

    # If the properties are present in the catalog, then write properties to
    # disk
    if len(properties) > 0:
        for property_name, property_value in six.iteritems(properties):
            if isinstance(property_value, six.string_types) is True:
                metadata_dict[property_name] = property_value

    # try to save metadata
    can_write, file_exists = ps._check_file_path(metadata_filename)
    if can_write:
        # If the file already exists, then issue a warning and overwrite the
        # file
        if file_exists:
            logger.warning(
                'Metadata file already exists at %s. Overwriting it',
                metadata_filename)
            # write metadata contents
            with open(metadata_filename, 'wb') as file_handler:
                cloudpickle.dump(metadata_dict, file_handler)
        else:
            # write metadata contents
            with open(metadata_filename, 'wb') as file_handler:
                cloudpickle.dump(metadata_dict, file_handler)
    else:
        logger.warning(
            'Cannot write metadata at the file path %s. Skip writing metadata '
            'file', metadata_filename)

    return True
def save_object(object_to_save, file_path):
    """
    Saves a Python object to disk.

    This function is intended to be used to save py_entitymatching objects such as
    rule-based blocker, feature vectors, etc. A user would like to store
    py_entitymatching objects to disk, when he/she wants to save the workflow and
    resume it later. This function provides a way to save the required
    objects to disk.

    This function takes in the object to save the file path. It pickles the object and
    stores it in the file path specified.

    Args:
        object_to_save (Python object): The Python object to save. This can be
            a rule-based blocker, feature vectors, etc.

        file_path (string): The file path where the object must be saved.


    Returns:
        A Boolean value of True is returned, if the saving was successful.

    Raises:
        AssertionError: If `file_path` is not of type string.
        AssertionError: If a file cannot be written in the given `file_path`.

    Examples:

        >>> import pandas as pd
        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
        >>> B = pd.DataFrame({'id' : [1, 2], 'colA':['c', 'd'], 'colB' : [30, 40]})
        >>> rb = em.RuleBasebBlocker()
        >>> block_f = em.get_features_for_blocking(A, B)
        >>> rule1 = ['colA_colA_lev_dist(ltuple, rtuple) > 3']
        >>> rb.add_rule(rule1)
        >>> em.save_object(rb, './rule_blocker.pkl')



    See Also:
        :meth:`~load_object`

    """
    # Validate input parameters

    validate_object_type(file_path, six.string_types, 'Input file path')

    # Check whether the file path is valid and if a file is already present
    # at that path.
    # noinspection PyProtectedMember
    can_write, file_exists = ps._check_file_path(file_path)

    # Check whether we can write
    if can_write:
        # If a file already exists in that location, issue a warning and
        # overwrite the file.
        if file_exists:
            logger.warning(
                'File already exists at %s; Overwriting it', file_path)
            # we open the file in 'wb' mode as we are writing a binary file.
            with open(file_path, 'wb') as file_handler:
                cloudpickle.dump(object_to_save, file_handler)
        else:
            with open(file_path, 'wb') as file_handler:
                cloudpickle.dump(object_to_save, file_handler)

    # If we cannot write, then raise an error.
    else:
        logger.error('Cannot write in the file path %s; Exiting', file_path)
        raise AssertionError('Cannot write in the file path %s', file_path)

    # Return True if everything was successful.
    return True
Beispiel #60
0
def main():

  args = argparser.parse_args()
  n_pts = int(args.points)

  print 'Loading data'
  with open(os.path.abspath(args.in_file), 'rb') as f:
    data = cloudpickle.load(f)

  print 'subtracting mean'
  time_arr = data['time_arr'] - np.mean(data['time_arr'])
  gyr_arr = data['gyr_arr']   - np.tile( np.mean(data['gyr_arr'],  axis=1).reshape((3,1)), (1,data['gyr_arr'].shape[1]) )
  acc_arr = data['acc_arr']   - np.tile( np.mean(data['acc_arr'],  axis=1).reshape((3,1)), (1,data['acc_arr'].shape[1]) )
  
  # M: number of axes
  # N: number of epochs
  if acc_arr.shape != gyr_arr.shape:
    raise Exception('different sizes')
  M, N = gyr_arr.shape

  # automate this?
  print 'Computing mean dt'
  t0 = np.mean(np.diff(time_arr))
  fs = np.float64(1.0)/t0

  n = np.power(2, np.arange(np.floor(np.log2(N/2.))))
  end_log_inc = np.log10(n[-1])
  m = shared_from_array( np.unique(np.ceil(np.logspace(0, end_log_inc, n_pts))).astype(np.int64) )
  T = m*t0

  if (T < 0).any():
    print 'T < 0'
    set_trace()

  # setup input/output shared memory arrays
  theta_gyr = shared_from_array( np.cumsum(gyr_arr, axis=1) )
  theta_acc = shared_from_array( np.cumsum(acc_arr, axis=1) )
  sigma2_gyr = shared_from_array( np.zeros((M, len(m))) )
  sigma2_acc = shared_from_array( np.zeros((M, len(m))) )

  # shared memory/serialization workaround: define calculation functions here so
  # that the shared memory arrays are in scope

  def adev_at_tau(i):
    """worker function for parallelization. first part of the Allan deviation 
    equation.
    There is potentially a way to do the Allan deviation calculation without any
    for loop whatsoever, but I haven't figured it out yet. It would require 2D
    array indexing in NumPy.
    """
    k = range(N - 2*m[i])
    sigma2_gyr[:,i] = np.sum( np.power( theta_gyr[:,k+2*m[i]] - 2*theta_gyr[:,k+m[i]] + theta_gyr[:,k] , 2 ), axis=1)
    sigma2_acc[:,i] = np.sum( np.power( theta_acc[:,k+2*m[i]] - 2*theta_acc[:,k+m[i]] + theta_acc[:,k] , 2 ), axis=1)


  def adev_at_tau_wrapper(idxs):
    if idxs[0] == 0:
      for i in trange(len(idxs)):
        adev_at_tau(idxs[i])
    else:
      for i in idxs:
        adev_at_tau(i)


  print 'creating procs'
  idx_chunks = chunk(range(len(m)), int(args.cores))
  procs = [multiprocessing.Process(target=adev_at_tau_wrapper, args=(ichnk,)) for ichnk in idx_chunks]
  print '# chunks: ', len(procs)
  for proc in procs:
    proc.start()
  for proc in procs:
    proc.join()

  div = np.tile(2*np.multiply(np.power(T,2), N-2*m), (M,1))
  sigma2_gyr = np.divide(sigma2_gyr, div)
  sigma2_acc = np.divide(sigma2_acc, div)
  sigma_gyr = np.sqrt(sigma2_gyr)
  sigma_acc = np.sqrt(sigma2_acc)

  data_dir, in_name = os.path.split(os.path.abspath(args.in_file))
  set_name, ext = in_name.split(os.extsep)
  out_file_name = os.path.join(data_dir, set_name+'_adev'+os.extsep+ext)
  print 'saving to: ', out_file_name
  with open(out_file_name, 'wb') as f:
    cloudpickle.dump(
      {
        'T': T,
        'sigma2_gyr': sigma2_gyr,
        'sigma2_acc': sigma2_acc,
        'sigma_gyr': sigma_gyr,
        'sigma_acc': sigma_acc,
      },
      f, -1
    )