def yield_result(self, value, output_name='result'): if not self.solid_def: return pm.record(output_name, serialize_dm_object(value)) if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}'. format( solid_name=self.solid_def.name, output_name=output_name, )) output_def = self.solid_def.output_def_named(output_name) try: return pm.record( output_name, serialize_dm_object( output_def.dagster_type.evaluate_value(value)), ) except DagsterEvaluateValueError as de: raise_from( DagstermillError(( 'Solid {solid_name} output {output_name} output_type {output_type} ' + 'failed type check on value {value}').format( solid_name=self.solid_def.name, output_name=output_name, output_type=output_def.dagster_type.name, value=repr(value), )), de, )
def yield_result(self, value, output_name): if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}'. format(solid_name=self.solid_def.name, output_name=output_name)) if not self.populated_by_papermill: return value runtime_type = self.solid_def.output_def_named( output_name).runtime_type out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) pm.record(output_name, write_value(runtime_type, value, out_file))
def yield_result(self, context_or_serialized, value, output_name='result'): dm_context = self._get_cached_dagstermill_context( context_or_serialized) if isinstance(dm_context, InMemoryDagstermillContext): return value solid_def = dm_context.solid_def if not solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}'. format(solid_name=solid_def.name, output_name=output_name)) runtime_type = solid_def.output_def_named(output_name).runtime_type out_file = os.path.join(dm_context.marshal_dir, 'output-{}'.format(output_name)) pm.record(output_name, marshal_value(runtime_type, value, out_file))
def yield_result(self, value, output_name): if not self.populated_by_papermill: return value check.invariant( self.solid_def is not None, "If Dagstermill has been run by papermill, self.solid_def should not be None", ) if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}'.format( solid_name=self.solid_def.name, output_name=output_name ) ) runtime_type = self.solid_def.output_def_named(output_name).runtime_type out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) pm.record(output_name, write_value(runtime_type, value, out_file))
def save(self, name, value, display=False): if not name in self.out: raise ValueError('\"%s\" not in output schema!' % name) otype = self.out[name]['type'] if otype == 'Image': if type(value) is str: # filename value = Image(value) if type(value) is Image: if display: idisplay(value) data, _metadata = IPython.core.formatters.format_display_data( value) pm.record(name, data) return if display: idisplay(value) if otype == 'Array' and type(value) is np.ndarray: sval = json.dumps(value, cls=plotly.utils.PlotlyJSONEncoder) pm.record(name, sval) return pm.record(name, value)
def yield_result(self, value, output_name): if not self.populated_by_papermill: return value if self.solid_def is None: if output_name not in self.output_name_type_dict: raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def_name, output_name=output_name)) runtime_type_enum = self.output_name_type_dict[output_name] if runtime_type_enum == SerializableRuntimeType.SCALAR: pm.record(output_name, value) elif runtime_type_enum == SerializableRuntimeType.ANY and is_json_serializable( value): pm.record(output_name, value) elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE: out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) serialize_to_file( MANAGER_FOR_NOTEBOOK_INSTANCE.context, PickleSerializationStrategy(), value, out_file, ) pm.record(output_name, out_file) else: raise DagstermillError( 'Output Definition for output {output_name} requires repo registration ' 'since it has a complex serialization format'.format( output_name=output_name)) else: if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def.name, output_name=output_name)) runtime_type = self.solid_def.output_def_named( output_name).runtime_type out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) pm.record(output_name, write_value(runtime_type, value, out_file))
def save(name, value, display=False): if display: idisplay(value) if type(value) is np.ndarray: sval = json.dumps(value, cls=plotly.utils.PlotlyJSONEncoder) pm.record(name, sval) return if type(value) is Video or type(value) is Image: data, _metadata = IPython.core.formatters.format_display_data(value) pm.record(name, data) return pm.record(name, value)
download_deeprec_resources(mind_url, \ os.path.join(data_path, 'valid'), mind_dev_dataset) if not os.path.exists(yaml_file): download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \ os.path.join(data_path, 'utils'), mind_utils) ## Create hyper-parameters hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \ wordDict_file=wordDict_file, userDict_file=userDict_file,\ epochs=epochs) print(hparams) iterator = MINDIterator ## Train the NPA model model = NPAModel(hparams, iterator, seed=seed) print(model.run_eval(valid_news_file, valid_behaviors_file)) model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file) res_syn = model.run_eval(valid_news_file, valid_behaviors_file) print(res_syn) pm.record("res_syn", res_syn) ## Save the model model_path = os.path.join(data_path, "model") os.makedirs(model_path, exist_ok=True) model.model.save_weights(os.path.join(model_path, "npa_ckpt"))
if not os.path.exists(yaml_file): download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/deeprec/', data_path1, 'mind-demo-dkn.zip') ## Create hyper-parameters epochs = 10 history_size = 50 batch_size = 100 hparams = prepare_hparams(yaml_file, news_feature_file = news_feature_file, user_history_file = user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=epochs, history_size=history_size, batch_size=batch_size) print(hparams) ## Train the DKN model model = DKN(hparams, DKNTextIterator) print(model.run_eval(valid_file)) model.fit(train_file, valid_file) ## Evaluate the DKN model res = model.run_eval(test_file) print(res) pm.record("res", res)
# Here we use the **start_date** and **stop_date** parameters, which are defined above by default, but can # be overwritten at runtime by papermill. # %% data_highlight = data.loc[start_date:stop_date] # %% [markdown] # We use the `pm.record()` function to keep track of how many records were included in the # highlighted section. This lets us inspect this value after running the notebook with papermill. # # We also include a ValueError if we've got a but in the start/stop times, which will be captured # and displayed by papermill if it's triggered. # %% num_records = len(data_highlight) pm.record('num_records', num_records) if num_records == 0: raise ValueError( "I have no data to highlight! Check that your dates are correct!") # %% [markdown] # ## Make our plot # # Below we'll generate a matplotlib figure with our highlighted dates. By calling `pm.display()`, papermill # will store the figure to the key that we've specified (`highlight_dates_fig`). This will let us inspect the # output later on. # %% fig, ax = plt.subplots() ax.plot(data.index, data['mydata'], c='k', alpha=.5) ax.plot(data_highlight.index, data_highlight['mydata'], c='r', lw=3)