예제 #1
0
    def yield_event(self, dagster_event):
        """Yield a dagster event directly from notebook code.

        When called interactively or in development, returns its input.

        Args:
            dagster_event (Union[:class:`dagster.AssetMaterialization`, :class:`dagster.ExpectationResult`, :class:`dagster.TypeCheck`, :class:`dagster.Failure`, :class:`dagster.RetryRequested`]):
                An event to yield back to Dagster.
        """
        valid_types = (
            Materialization,
            AssetMaterialization,
            ExpectationResult,
            TypeCheck,
            Failure,
            RetryRequested,
        )
        if not isinstance(dagster_event, valid_types):
            raise DagstermillError(
                f"Received invalid type {dagster_event} in yield_event. Expected a Dagster event type, one of {valid_types}."
            )

        if not self.in_pipeline:
            return dagster_event

        # deferred import for perf
        import scrapbook

        event_id = "event-{event_uuid}".format(event_uuid=str(uuid.uuid4()))
        out_file_path = os.path.join(self.marshal_dir, event_id)
        with open(out_file_path, "wb") as fd:
            fd.write(pickle.dumps(dagster_event, PICKLE_PROTOCOL))

        scrapbook.glue(event_id, out_file_path)
예제 #2
0
파일: manager.py 프로젝트: xhochy/dagster
    def yield_event(self, dagster_event):
        '''Yield a dagster event directly from notebook code.

        When called interactively or in development, returns its input.

        Args:
            dagster_event (Union[:class:`dagster.Materialization`, :class:`dagster.ExpectationResult`, :class:`dagster.TypeCheck`, :class:`dagster.Failure`]):
                An event to yield back to Dagster.
        '''
        check.inst_param(
            dagster_event, 'dagster_event',
            (Materialization, ExpectationResult, TypeCheck, Failure))

        if not self.in_pipeline:
            return dagster_event

        # deferred import for perf
        import scrapbook

        event_id = 'event-{event_uuid}'.format(event_uuid=str(uuid.uuid4()))
        out_file_path = os.path.join(self.marshal_dir, event_id)
        with open(out_file_path, 'wb') as fd:
            fd.write(pickle.dumps(dagster_event, PICKLE_PROTOCOL))

        scrapbook.glue(event_id, out_file_path)
예제 #3
0
    def yield_result(self, value, output_name="result"):
        """Yield a result directly from notebook code.

        When called interactively or in development, returns its input.

        Args:
            value (Any): The value to yield.
            output_name (Optional[str]): The name of the result to yield (default: ``'result'``).
        """
        if not self.in_pipeline:
            return value

        # deferred import for perf
        import scrapbook

        if not self.solid_def.has_output(output_name):
            raise DagstermillError(
                f"Solid {self.solid_def.name} does not have output named {output_name}."
                f"Expected one of {[str(output_def.name) for output_def in self.solid_def.output_defs]}"
            )

        dagster_type = self.solid_def.output_def_named(
            output_name).dagster_type

        out_file = os.path.join(self.marshal_dir, f"output-{output_name}")
        scrapbook.glue(output_name, write_value(dagster_type, value, out_file))
예제 #4
0
    def yield_result(self, value, output_name="result"):
        """Yield a result directly from notebook code.

        When called interactively or in development, returns its input.

        Args:
            value (Any): The value to yield.
            output_name (Optional[str]): The name of the result to yield (default: ``'result'``).
        """
        if not self.in_pipeline:
            return value

        # deferred import for perf
        import scrapbook

        if not self.solid_def.has_output(output_name):
            raise DagstermillError(
                f"Solid {self.solid_def.name} does not have output named {output_name}."
                f"Expected one of {[str(output_def.name) for output_def in self.solid_def.output_defs]}"
            )

        # pass output value cross process boundary using io manager
        step_context = self.context._step_context  # pylint: disable=protected-access
        # Note: yield_result currently does not support DynamicOutput
        step_output_handle = StepOutputHandle(step_key=step_context.step.key,
                                              output_name=output_name)
        output_context = step_context.get_output_context(step_output_handle)
        io_manager = step_context.get_io_manager(step_output_handle)

        # Note that we assume io manager is symmetric, i.e handle_input(handle_output(X)) == X
        io_manager.handle_output(output_context, value)

        # record that the output has been yielded
        scrapbook.glue(output_name, "")
예제 #5
0
    def yield_result(self, value, output_name="result"):
        """Yield a result directly from notebook code.

        When called interactively or in development, returns its input.

        Args:
            value (Any): The value to yield.
            output_name (Optional[str]): The name of the result to yield (default: ``'result'``).
        """
        if not self.in_pipeline:
            return value

        # deferred import for perf
        import scrapbook

        if not self.solid_def.has_output(output_name):
            raise DagstermillError(
                f"Solid {self.solid_def.name} does not have output named {output_name}."
                f"Expected one of {[str(output_def.name) for output_def in self.solid_def.output_defs]}"
            )

        dagster_type = self.solid_def.output_def_named(output_name).dagster_type

        # https://github.com/dagster-io/dagster/issues/2648
        # dagstermill temporary file creation should use a more systematic and robust scheme
        out_file = os.path.join(
            self.marshal_dir, f"{self.context.solid_handle}-output-{output_name}"
        )

        scrapbook.glue(output_name, write_value(dagster_type, value, out_file))
예제 #6
0
파일: manager.py 프로젝트: xhochy/dagster
    def yield_result(self, value, output_name='result'):
        '''Yield a result directly from notebook code.

        When called interactively or in development, returns its input.

        Args:
            value (Any): The value to yield.
            output_name (Optional[str]): The name of the result to yield (default: ``'result'``).
        '''
        if not self.in_pipeline:
            return value

        # deferred import for perf
        import scrapbook

        if not self.solid_def.has_output(output_name):
            raise DagstermillError(
                'Solid {solid_name} does not have output named {output_name}'.
                format(solid_name=self.solid_def.name,
                       output_name=output_name))

        runtime_type = self.solid_def.output_def_named(
            output_name).runtime_type

        out_file = os.path.join(self.marshal_dir,
                                'output-{}'.format(output_name))
        scrapbook.glue(output_name, write_value(runtime_type, value, out_file))
예제 #7
0
    def yield_event(self, dagster_event):
        if not self.populated_by_papermill:
            return dagster_event

        event_id = 'event-{event_uuid}'.format(event_uuid=str(uuid.uuid4()))
        out_file_path = os.path.join(self.marshal_dir, event_id)
        with open(out_file_path, 'wb') as fd:
            fd.write(pickle.dumps(dagster_event, PICKLE_PROTOCOL))

        scrapbook.glue(event_id, out_file_path)
예제 #8
0
    def yield_materialization(self, path, description):
        if not self.populated_by_papermill:
            return Materialization(path, description)

        materialization_id = 'materialization-{materialization_uuid}'.format(
            materialization_uuid=str(uuid.uuid4()))
        out_file_path = os.path.join(self.marshal_dir, materialization_id)
        with open(out_file_path, 'wb') as fd:
            fd.write(
                pickle.dumps(Materialization(path, description),
                             PICKLE_PROTOCOL))

        scrapbook.glue(materialization_id, out_file_path)
예제 #9
0
파일: manager.py 프로젝트: cuulee/dagster
    def yield_event(self, dagster_event):
        if not self.in_pipeline:
            return dagster_event

        # deferred import for perf
        import scrapbook

        event_id = 'event-{event_uuid}'.format(event_uuid=str(uuid.uuid4()))
        out_file_path = os.path.join(self.marshal_dir, event_id)
        with open(out_file_path, 'wb') as fd:
            fd.write(pickle.dumps(dagster_event, PICKLE_PROTOCOL))

        scrapbook.glue(event_id, out_file_path)
예제 #10
0
파일: manager.py 프로젝트: cuulee/dagster
    def yield_result(self, value, output_name='result'):
        if not self.in_pipeline:
            return value

        # deferred import for perf
        import scrapbook

        if not self.solid_def.has_output(output_name):
            raise DagstermillError(
                'Solid {solid_name} does not have output named {output_name}'.
                format(solid_name=self.solid_def.name,
                       output_name=output_name))

        runtime_type = self.solid_def.output_def_named(
            output_name).runtime_type

        out_file = os.path.join(self.marshal_dir,
                                'output-{}'.format(output_name))
        scrapbook.glue(output_name, write_value(runtime_type, value, out_file))
예제 #11
0
    def yield_result(self, value, output_name='result'):
        if not self.populated_by_papermill:
            return value

        if self.solid_def is None:
            if output_name not in self.output_name_type_dict:
                raise DagstermillError(
                    'Solid {solid_name} does not have output named {output_name}'
                    .format(solid_name=self.solid_def_name,
                            output_name=output_name))
            runtime_type_enum = self.output_name_type_dict[output_name]
            if runtime_type_enum == SerializableRuntimeType.SCALAR:
                scrapbook.glue(output_name, value)
            elif runtime_type_enum == SerializableRuntimeType.ANY and is_json_serializable(
                    value):
                scrapbook.glue(output_name, value)
            elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE:
                out_file = os.path.join(self.marshal_dir,
                                        'output-{}'.format(output_name))
                PickleSerializationStrategy().serialize_to_file(
                    value, out_file)
                scrapbook.glue(output_name, out_file)
            else:
                raise DagstermillError(
                    # Discuss this in the docs and improve error message
                    # https://github.com/dagster-io/dagster/issues/1275
                    # https://github.com/dagster-io/dagster/issues/1276
                    'Output Definition for output {output_name} requires repo registration '
                    'since it has a complex serialization format'.format(
                        output_name=output_name))
        else:
            if not self.solid_def.has_output(output_name):
                raise DagstermillError(
                    'Solid {solid_name} does not have output named {output_name}'
                    .format(solid_name=self.solid_def.name,
                            output_name=output_name))

            runtime_type = self.solid_def.output_def_named(
                output_name).runtime_type

            out_file = os.path.join(self.marshal_dir,
                                    'output-{}'.format(output_name))
            scrapbook.glue(output_name,
                           write_value(runtime_type, value, out_file))
예제 #12
0
# %%
results.plot_diagnostics(figsize=(16, 8))
plt.show()

# %%
pred = results.get_prediction(start=(run_datetime - timedelta(1)),
                              dynamic=False)
pred_ci = pred.conf_int()

fig, ax = plt.subplots()
ax.plot(data[data.index > (run_datetime - timedelta(3))]['mydata'],
        label='observed')
ax.plot(pred.predicted_mean, label='One-step ahead Forecast', alpha=.7)
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1],
                color='k',
                alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('mydata')
ax.set(title='Results of ARIMA{}x{}12 - AIC:{} on {}'.format(
    best[1], best[2], round(best[0]), run_date))
fig.legend()
sb.glue('arima_results_fig', fig, display=True)

# %%
pred.save("../data/output/step2/prediction_model_" + run_date + "-" +
          source_id)

# %%
예제 #13
0
# %%
date_hour_stamp = time.strftime('%Y-%m-%d_%H_%M')
file_name = 'All_Plants_Before_After_Budgeted_CPUs_' + date_hour_stamp + '.xlsx'

# %% [markdown]
# ### Write/save file to designated network share drive location:

# %%
cpu_before_after_merge.to_excel(save_dir / file_name, index=False)

# %% [markdown]
# ### Now, we need to "glue" the location of the saved file location to this notebook so that another notebook can retrieve/reference from it:

# %%
str(save_dir / file_name)

# %%
sb.glue("path_to_red_green_sheet_excel_file", str(save_dir / file_name))

# %% [markdown]
# ### Send Windows Toast notification when script completes

# %%
toaster = ToastNotifier()
toaster.show_toast(
    "### Before vs After CPU Status ###",
    "Successfuly compared before CPUs with after CPU adjustments",
    icon_path="images/honda_logo.ico",
    duration=5)
예제 #14
0
# Here we use the **start_date** and **stop_date** parameters, which are defined above by default, but can
# be overwritten at runtime by papermill.

# %%
data_highlight = data.loc[start_date:stop_date]

# %% [markdown]
# We use the `pm.record()` function to keep track of how many records were included in the
# highlighted section. This lets us inspect this value after running the notebook with papermill.
#
# We also include a ValueError if we've got a but in the start/stop times, which will be captured
# and displayed by papermill if it's triggered.

# %%
num_records = len(data_highlight)
sb.glue('num_records', num_records, display=True)
if num_records == 0:
    raise ValueError(
        "I have no data to highlight! Check that your dates are correct!")

# %% [markdown]
# ## Make our plot
#
# Below we'll generate a matplotlib figure with our highlighted dates. By calling `pm.display()`, papermill
# will store the figure to the key that we've specified (`highlight_dates_fig`). This will let us inspect the
# output later on.

# %%
fig, ax = plt.subplots()
ax.plot(data.index, data['mydata'], c='k', alpha=.5)
ax.plot(data_highlight.index, data_highlight['mydata'], c='r', lw=3)
예제 #15
0
data = pd.DataFrame(np.random.randn(len(td)), columns=['mydata'])
data = data.rolling(70, min_periods=1,
                    center=True).mean()  # Smooth it so it looks purdy
data['date'] = td
data['hour'] = data['date'].apply(lambda x: datetime.strftime(x, "%H"))

# %%
print(data['date'].describe())
data.describe()

# %%
data = data.sort_values('date').set_index('date', drop=True)
data.head(5)

# %%
fig, ax = plt.subplots()
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
plt.gcf().autofmt_xdate()
ax.plot(data.index, data['mydata'], c='k', alpha=.5)
ax.set(title="Activity for the day of {}".format(run_date))
sb.glue('activity_day_fig', fig, display=True)

# %%
month_partition = run_datetime.strftime("%Y-%m")
output_file = "../data/output/step1/" + month_partition + "/" + run_date + '-' + source_id + '.csv'
print(output_file)

# %%
os.makedirs(os.path.dirname(output_file), exist_ok=True)
data.to_csv(output_file)
예제 #16
0
                          show_step=10)
logger.debug(f"hparams: {hparams}")

iterator = MINDIterator

model = NRMSModel(hparams, iterator, seed=seed)

logger.info(model.run_eval(valid_news_file, valid_behaviors_file))

model.fit(train_news_file, train_behaviors_file, valid_news_file,
          valid_behaviors_file)

res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
logger.debug(f"res_syn: {res_syn}")

sb.glue("res_syn", res_syn)

model_path = os.path.join(BASE_DIR, "ckpt")
os.makedirs(model_path, exist_ok=True)

model.model.save_weights(os.path.join(model_path, "nrms_ckpt"))

group_impr_indexes, group_labels, group_preds = model.run_fast_eval(
    test_news_file, test_behaviors_file)

with open(os.path.join(BASE_DIR, 'submits', 'prediction.txt'), 'w') as f:
    for impr_index, preds in tqdm(zip(group_impr_indexes, group_preds)):
        impr_index += 1
        pred_rank = (np.argsort(np.argsort(preds)[::-1]) + 1).tolist()
        pred_rank = '[' + ','.join([str(i) for i in pred_rank]) + ']'
        f.write(' '.join([str(impr_index), pred_rank]) + '\n')