Ejemplo n.º 1
0
    def yield_result(self, value, output_name='result'):
        if not self.solid_def:
            return pm.record(output_name, serialize_dm_object(value))

        if not self.solid_def.has_output(output_name):
            raise DagstermillError(
                'Solid {solid_name} does not have output named {output_name}'.
                format(
                    solid_name=self.solid_def.name,
                    output_name=output_name,
                ))

        output_def = self.solid_def.output_def_named(output_name)

        try:
            return pm.record(
                output_name,
                serialize_dm_object(
                    output_def.dagster_type.evaluate_value(value)),
            )
        except DagsterEvaluateValueError as de:
            raise_from(
                DagstermillError((
                    'Solid {solid_name} output {output_name} output_type {output_type} '
                    + 'failed type check on value {value}').format(
                        solid_name=self.solid_def.name,
                        output_name=output_name,
                        output_type=output_def.dagster_type.name,
                        value=repr(value),
                    )),
                de,
            )
Ejemplo n.º 2
0
    def yield_result(self, value, output_name):
        if not self.solid_def.has_output(output_name):
            raise DagstermillError(
                'Solid {solid_name} does not have output named {output_name}'.
                format(solid_name=self.solid_def.name,
                       output_name=output_name))
        if not self.populated_by_papermill:
            return value

        runtime_type = self.solid_def.output_def_named(
            output_name).runtime_type

        out_file = os.path.join(self.marshal_dir,
                                'output-{}'.format(output_name))
        pm.record(output_name, write_value(runtime_type, value, out_file))
Ejemplo n.º 3
0
    def yield_result(self, context_or_serialized, value, output_name='result'):
        dm_context = self._get_cached_dagstermill_context(
            context_or_serialized)

        if isinstance(dm_context, InMemoryDagstermillContext):
            return value

        solid_def = dm_context.solid_def

        if not solid_def.has_output(output_name):
            raise DagstermillError(
                'Solid {solid_name} does not have output named {output_name}'.
                format(solid_name=solid_def.name, output_name=output_name))

        runtime_type = solid_def.output_def_named(output_name).runtime_type

        out_file = os.path.join(dm_context.marshal_dir,
                                'output-{}'.format(output_name))
        pm.record(output_name, marshal_value(runtime_type, value, out_file))
Ejemplo n.º 4
0
    def yield_result(self, value, output_name):
        if not self.populated_by_papermill:
            return value

        check.invariant(
            self.solid_def is not None,
            "If Dagstermill has been run by papermill, self.solid_def should not be None",
        )
        if not self.solid_def.has_output(output_name):
            raise DagstermillError(
                'Solid {solid_name} does not have output named {output_name}'.format(
                    solid_name=self.solid_def.name, output_name=output_name
                )
            )

        runtime_type = self.solid_def.output_def_named(output_name).runtime_type

        out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name))
        pm.record(output_name, write_value(runtime_type, value, out_file))
Ejemplo n.º 5
0
    def save(self, name, value, display=False):
        if not name in self.out:
            raise ValueError('\"%s\" not in output schema!' % name)
        otype = self.out[name]['type']

        if otype == 'Image':
            if type(value) is str:
                # filename
                value = Image(value)
            if type(value) is Image:
                if display:
                    idisplay(value)
                data, _metadata = IPython.core.formatters.format_display_data(
                    value)
                pm.record(name, data)
                return

        if display:
            idisplay(value)

        if otype == 'Array' and type(value) is np.ndarray:
            sval = json.dumps(value, cls=plotly.utils.PlotlyJSONEncoder)
            pm.record(name, sval)
            return

        pm.record(name, value)
Ejemplo n.º 6
0
    def yield_result(self, value, output_name):
        if not self.populated_by_papermill:
            return value

        if self.solid_def is None:
            if output_name not in self.output_name_type_dict:
                raise DagstermillError(
                    'Solid {solid_name} does not have output named {output_name}'
                    .format(solid_name=self.solid_def_name,
                            output_name=output_name))
            runtime_type_enum = self.output_name_type_dict[output_name]
            if runtime_type_enum == SerializableRuntimeType.SCALAR:
                pm.record(output_name, value)
            elif runtime_type_enum == SerializableRuntimeType.ANY and is_json_serializable(
                    value):
                pm.record(output_name, value)
            elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE:
                out_file = os.path.join(self.marshal_dir,
                                        'output-{}'.format(output_name))
                serialize_to_file(
                    MANAGER_FOR_NOTEBOOK_INSTANCE.context,
                    PickleSerializationStrategy(),
                    value,
                    out_file,
                )
                pm.record(output_name, out_file)
            else:
                raise DagstermillError(
                    'Output Definition for output {output_name} requires repo registration '
                    'since it has a complex serialization format'.format(
                        output_name=output_name))
        else:
            if not self.solid_def.has_output(output_name):
                raise DagstermillError(
                    'Solid {solid_name} does not have output named {output_name}'
                    .format(solid_name=self.solid_def.name,
                            output_name=output_name))

            runtime_type = self.solid_def.output_def_named(
                output_name).runtime_type

            out_file = os.path.join(self.marshal_dir,
                                    'output-{}'.format(output_name))
            pm.record(output_name, write_value(runtime_type, value, out_file))
Ejemplo n.º 7
0
def save(name, value, display=False):
    if display:
        idisplay(value)

    if type(value) is np.ndarray:
        sval = json.dumps(value, cls=plotly.utils.PlotlyJSONEncoder)
        pm.record(name, sval)
        return

    if type(value) is Video or type(value) is Image:
        data, _metadata = IPython.core.formatters.format_display_data(value)
        pm.record(name, data)
        return

    pm.record(name, value)
Ejemplo n.º 8
0
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \
                               os.path.join(data_path, 'utils'), mind_utils)

## Create hyper-parameters
hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \
                          wordDict_file=wordDict_file, userDict_file=userDict_file,\
                          epochs=epochs)
print(hparams)

iterator = MINDIterator

## Train the NPA model
model = NPAModel(hparams, iterator, seed=seed)
print(model.run_eval(valid_news_file, valid_behaviors_file))

model.fit(train_news_file, train_behaviors_file, valid_news_file,
          valid_behaviors_file)

res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
print(res_syn)
pm.record("res_syn", res_syn)

## Save the model
model_path = os.path.join(data_path, "model")
os.makedirs(model_path, exist_ok=True)

model.model.save_weights(os.path.join(model_path, "npa_ckpt"))
Ejemplo n.º 9
0
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/deeprec/', data_path1, 'mind-demo-dkn.zip')


## Create hyper-parameters
epochs = 10
history_size = 50
batch_size = 100
hparams = prepare_hparams(yaml_file,
                          news_feature_file = news_feature_file,
                          user_history_file = user_history_file,
                          wordEmb_file=wordEmb_file,
                          entityEmb_file=entityEmb_file,
                          contextEmb_file=contextEmb_file,
                          epochs=epochs,
                          history_size=history_size,
                          batch_size=batch_size)
print(hparams)


## Train the DKN model
model = DKN(hparams, DKNTextIterator)
print(model.run_eval(valid_file))
model.fit(train_file, valid_file)


## Evaluate the DKN model
res = model.run_eval(test_file)
print(res)
pm.record("res", res)
Ejemplo n.º 10
0
# Here we use the **start_date** and **stop_date** parameters, which are defined above by default, but can
# be overwritten at runtime by papermill.

# %%
data_highlight = data.loc[start_date:stop_date]

# %% [markdown]
# We use the `pm.record()` function to keep track of how many records were included in the
# highlighted section. This lets us inspect this value after running the notebook with papermill.
#
# We also include a ValueError if we've got a but in the start/stop times, which will be captured
# and displayed by papermill if it's triggered.

# %%
num_records = len(data_highlight)
pm.record('num_records', num_records)
if num_records == 0:
    raise ValueError(
        "I have no data to highlight! Check that your dates are correct!")

# %% [markdown]
# ## Make our plot
#
# Below we'll generate a matplotlib figure with our highlighted dates. By calling `pm.display()`, papermill
# will store the figure to the key that we've specified (`highlight_dates_fig`). This will let us inspect the
# output later on.

# %%
fig, ax = plt.subplots()
ax.plot(data.index, data['mydata'], c='k', alpha=.5)
ax.plot(data_highlight.index, data_highlight['mydata'], c='r', lw=3)