Beispiel #1
0
 def get_node_path(self, node):
     assert len(node.parents) > 0, "Got node with no parents"
     if node.parents[0] == self.root.gid:
         return []
     try:
         parent = Gphoto.objects(gid=node.parents[0]).get()
     except me.MultipleObjectsReturned as e:
         logger.warning(
             f"Wrong number of records returned for {node.gid}. Error {e}"
         )
         return ["*MultiParents*"]
     except me.DoesNotExist as e:
         logger.warning(f"Parent does not exist. Error {e}")
         return []
     if parent.path:
         return parent.path + [parent.name]
     else:
         return self.get_node_path(parent) + [parent.name]
Beispiel #2
0
def train_a2c(
    base_agent,
    num_epochs,
    checkpoint_path=None,
    players_per_game=4,
    lr_schedule=None,
    entropy_schedule=None,
    td_lambda_schedule=None,
    pretraining=False,
):
    """Train advantage actor-critic (A2C) agent through self-play"""
    objective = base_agent._objective

    default_schedules = get_default_schedules(pretraining=pretraining)

    if lr_schedule is None:
        lr_schedule = default_schedules["learning_rate"]

    if entropy_schedule is None:
        entropy_schedule = default_schedules["entropy"]

    if td_lambda_schedule is None:
        td_lambda_schedule = default_schedules["td_lambda"]

    optimizer = optax.MultiSteps(
        optax.chain(optax.adam(learning_rate=1), optax.scale_by_schedule(lr_schedule)),
        players_per_game,
    )
    opt_state = optimizer.init(base_agent.get_weights())

    running_stats = defaultdict(lambda: deque(maxlen=1000))
    progress = tqdm.tqdm(range(num_epochs), dynamic_ncols=True)

    loss_type = "supervised" if pretraining else "a2c"
    loss_fn = compile_loss_function(loss_type, base_agent._network)
    sgd_step = compile_sgd_step(loss_fn, optimizer)

    best_score = -float("inf")

    if pretraining:
        greedy_agent = base_agent.clone()
        greedy_agent._be_greedy = True
        agents = [greedy_agent] * players_per_game
    else:
        agents = [base_agent] * players_per_game

    for i in progress:
        scores, trajectories = play_tournament(agents, record_trajectories=True)

        final_scores = [s.total_score() for s in scores]
        winner = np.argmax(final_scores)
        logger.info(
            "Player {} won with a score of {} (median {})",
            winner,
            final_scores[winner],
            np.median(final_scores),
        )
        logger.info(
            " Winning scorecard:\n{}",
            print_score(scores[winner]),
        )

        weights = base_agent._weights
        loss_kwargs = dict(
            entropy_cost=entropy_schedule(i), td_lambda=td_lambda_schedule(i)
        )

        for p in range(players_per_game):
            observations, actions, rewards = zip(*trajectories[p])
            assert sum(rewards) == scores[p].total_score()

            observations = np.stack(observations, axis=0)
            actions = np.array(actions, dtype=np.int32)
            rewards = np.array(rewards, dtype=np.float32) / REWARD_NORM

            logger.debug(" observations {}: {}", p, observations)
            logger.debug(" actions {}: {}", p, actions)
            logger.debug(" rewards {}: {}", p, rewards)

            if objective == "win" and p == winner:
                rewards[-1] += WINNING_REWARD / REWARD_NORM

            weights, opt_state = sgd_step(
                weights, opt_state, observations, actions, rewards, **loss_kwargs
            )

            loss_components = loss_fn(
                weights, observations, actions, rewards, **loss_kwargs
            )
            loss_components = [float(k) for k in loss_components]

            epoch_stats = dict(
                actor_loss=loss_components[0],
                critic_loss=loss_components[1],
                entropy_loss=loss_components[2],
                loss=sum(loss_components),
                score=scores[p].total_score(),
            )
            for key, val in epoch_stats.items():
                buf = running_stats[key]
                if len(buf) == buf.maxlen:
                    buf.popleft()
                buf.append(val)

        base_agent.set_weights(weights)

        if pretraining:
            greedy_agent.set_weights(weights)

        if i % 10 == 0:
            avg_score = np.mean(running_stats["score"])
            if avg_score > best_score + 1 and i > running_stats["score"].maxlen:
                best_score = avg_score

                if checkpoint_path is not None:
                    logger.warning(
                        " Saving checkpoint for average score {:.2f}", avg_score
                    )
                    base_agent.save(checkpoint_path)

            progress.set_postfix(
                {key: np.mean(val) for key, val in running_stats.items()}
            )

    return base_agent
Beispiel #3
0
    def __run_task_phase(self, phase):
        """Executes task phase, ie. call all enabled plugins on the task.

        Fires events:

        * task.execute.before_plugin
        * task.execute.after_plugin

        :param string phase: Name of the phase
        """
        if phase not in phase_methods:
            raise Exception('%s is not a valid task phase' % phase)
        # warn if no inputs, filters or outputs in the task
        if phase in ['input', 'filter', 'output']:
            if not self.manager.unit_test:
                # Check that there is at least one manually configured plugin for these phases
                for p in self.plugins(phase):
                    if not p.builtin:
                        break
                else:
                    if phase not in self.suppress_warnings:
                        if phase == 'filter':
                            logger.warning(
                                'Task does not have any filter plugins to accept entries. '
                                'You need at least one to accept the entries you  want.'
                            )
                        else:
                            logger.warning(
                                'Task doesn\'t have any {} plugins, you should add (at least) one!',
                                phase,
                            )

        for plugin in self.plugins(phase):
            # Abort this phase if one of the plugins disables it
            if phase in self.disabled_phases:
                return
            if plugin.name in self.disabled_plugins:
                continue
            # store execute info, except during entry events
            self.current_phase = phase
            self.current_plugin = plugin.name

            if plugin.api_ver == 1:
                # backwards compatibility
                # pass method only task (old behaviour)
                args = (self, )
            else:
                # pass method task, copy of config (so plugin cannot modify it)
                args = (self, copy.copy(self.config.get(plugin.name)))

            # Hack to make task.session only active for a single plugin
            with Session() as session:
                self.session = session
                try:
                    fire_event('task.execute.before_plugin', self, plugin.name)
                    response = self.__run_plugin(plugin, phase, args)
                    if phase == 'input' and response:
                        # add entries returned by input to self.all_entries
                        for e in response:
                            e.task = self
                            self.all_entries.append(e)
                finally:
                    fire_event('task.execute.after_plugin', self, plugin.name)
                self.session = None
        # check config hash for changes at the end of 'prepare' phase
        if phase == 'prepare':
            self.check_config_hash()
Beispiel #4
0
    def _group_into_time_bins(self, ds_utime, ds_avg_intervals, ds_counts):
        # Produce row and time chunking strategies for each dataset
        ds_row_chunks = []
        ds_time_chunks = []
        ds_interval_secs = []

        it = zip(ds_utime, ds_avg_intervals, ds_counts)
        for di, (utime, avg_interval, counts) in enumerate(it):
            # Maintain row and time chunks for this dataset
            row_chunks = []
            time_chunks = []
            interval_secs = []

            # Start out with first entries
            bin_rows = counts[0]
            bin_times = 1
            bin_secs = avg_interval[0]

            dsit = enumerate(zip(utime[1:], avg_interval[1:], counts[1:]))
            for ti, (ut, avg_int, count) in dsit:
                if avg_int > self.time_bin_secs and self.time_bin_secs != -1.0:
                    logger.warning(
                        "The average INTERVAL associated with "
                        "unique time {:3f} in dataset {:d} "
                        "is {:3f} but this exceeds the requested "
                        "number of seconds in a time bin {:3f}s. "
                        "Consider increasing --time-bin-secs", ut, di, avg_int,
                        self.time_bin_secs)

                next_bin_secs = bin_secs + avg_int

                if next_bin_secs < self.time_bin_secs:
                    bin_secs = next_bin_secs
                    bin_rows += count
                    bin_times += 1
                # Otherwise finalize this bin and
                # start a new one with the counts
                # we were trying to add
                else:
                    row_chunks.append(bin_rows)
                    time_chunks.append(bin_times)
                    interval_secs.append(bin_secs)
                    bin_rows = count
                    bin_times = 1
                    bin_secs = avg_int

            # Finish any remaining bins
            if bin_rows > 0:
                assert bin_times > 0
                row_chunks.append(bin_rows)
                time_chunks.append(bin_times)
                interval_secs.append(bin_secs)

            row_chunks = tuple(row_chunks)
            time_chunks = tuple(time_chunks)
            interval_secs = tuple(interval_secs)
            ds_row_chunks.append(row_chunks)
            ds_time_chunks.append(time_chunks)
            ds_interval_secs.append(interval_secs)

        return ds_row_chunks, ds_time_chunks, ds_interval_secs
    def prepare_dir(self,
                    base_dir: str,
                    dir_name: str,
                    problem_list: List[str],
                    copy_dir=True):
        """prepare model checkpoint dir. this function will copy or save transformers' configs
        and tokenizers to params.ckpt_dir

        Args:
            base_dir (str): base_dir of params.ckpt_dir. same as os.path.dirname(params.ckpt_dir). bad naming
            dir_name (str): dir_name, same as os.path.basename(params.ckpt_dir). bad naming
            problem_list (List[str]): [description]
        """
        base = base_dir if base_dir is not None else 'models'

        dir_name = dir_name if dir_name is not None else '_'.join(
            problem_list) + '_ckpt'
        self.ckpt_dir = os.path.join(base, dir_name)

        # we need to make sure all configs, tokenizers are in ckpt_dir
        # configs
        from_config_path = os.path.join(self.init_checkpoint, 'bert_config')
        from_decoder_config_path = os.path.join(self.init_checkpoint,
                                                'bert_decoder_config')
        to_config_path = os.path.join(self.ckpt_dir, 'bert_config')
        to_decoder_config_path = os.path.join(self.ckpt_dir,
                                              'bert_decoder_config')

        # tokenizers
        from_tokenizer_path = os.path.join(self.init_checkpoint, 'tokenizer')
        to_tokenizer_path = os.path.join(self.ckpt_dir, 'tokenizer')

        from_decoder_tokenizer_path = os.path.join(self.init_checkpoint,
                                                   'decoder_tokenizer')
        to_decoder_tokenizer_path = os.path.join(self.ckpt_dir,
                                                 'decoder_tokenizer')

        self.params_path = os.path.join(self.ckpt_dir, 'params.json')

        if not self.predicting:
            create_path(self.ckpt_dir)

            # two ways to init model
            # 1. init from TF checkpoint dir created by m3tl.
            # 2. init from huggingface checkpoint.

            # bert config exists, init from existing config
            if os.path.exists(from_config_path):
                # copy config
                self._copy_or_wait(from_config_path,
                                   to_config_path,
                                   do_copy=copy_dir)
                self.bert_config = load_transformer_config(
                    to_config_path, self.transformer_config_loading)

                # copy tokenizer
                self._copy_or_wait(from_tokenizer_path,
                                   to_tokenizer_path,
                                   do_copy=copy_dir)

                # copy decoder config
                if os.path.exists(from_decoder_config_path):
                    self._copy_or_wait(from_decoder_config_path,
                                       to_decoder_config_path,
                                       do_copy=copy_dir)
                    self.bert_decoder_config = load_transformer_config(
                        from_decoder_config_path,
                        self.transformer_decoder_config_loading)
                    self.bert_decoder_config_dict = self.bert_decoder_config.to_dict(
                    )
                # copy decoder tokenizer
                if os.path.exists(from_decoder_tokenizer_path):
                    self._copy_or_wait(from_decoder_tokenizer_path,
                                       to_decoder_tokenizer_path,
                                       do_copy=copy_dir)

                self.init_weight_from_huggingface = False
            else:
                # load config from huggingface
                logger.warning(
                    '{} not exists. will load model from huggingface checkpoint.',
                    from_config_path)
                # get or download config
                self.init_weight_from_huggingface = True
                self.bert_config = load_transformer_config(
                    self.transformer_config_name,
                    self.transformer_config_loading)
                self.bert_config.save_pretrained(to_config_path)

                # save tokenizer
                tokenizer = load_transformer_tokenizer(
                    self.transformer_tokenizer_name,
                    self.transformer_tokenizer_loading)
                tokenizer.save_pretrained(to_tokenizer_path)
                # save_pretrained method of tokenizer saves the config as tokenizer_config.json, which will cause
                # OSError if use tokenizer.from_pretrained directly. we need to manually rename the json file
                try:
                    os.rename(
                        os.path.join(to_tokenizer_path,
                                     'tokenizer_config.json'),
                        os.path.join(to_tokenizer_path, 'config.json'))
                except:
                    pass

                # if decoder is specified
                if self.transformer_decoder_model_name:
                    self.bert_decoder_config = load_transformer_config(
                        self.transformer_decoder_config_name,
                        self.transformer_decoder_config_loading)
                    self.bert_decoder_config_dict = self.bert_decoder_config.to_dict(
                    )
                    self.bert_decoder_config.save_pretrained(
                        to_decoder_config_path)
                    decoder_tokenizer = load_transformer_tokenizer(
                        self.transformer_decoder_tokenizer_name,
                        self.transformer_decoder_tokenizer_loading)
                    decoder_tokenizer.save_pretrained(
                        to_decoder_tokenizer_path)
                    try:
                        os.rename(
                            os.path.join(to_decoder_tokenizer_path,
                                         'tokenizer_config.json'),
                            os.path.join(to_decoder_tokenizer_path,
                                         'config.json'))
                    except:
                        pass
        else:
            self.bert_config = load_transformer_config(to_config_path)
            if os.path.exists(to_decoder_config_path):
                self.bert_decoder_config = load_transformer_config(
                    to_decoder_config_path)
            self.init_weight_from_huggingface = False

        self.transformer_config_name = to_config_path
        # set value if and only if decoder is assigned
        self.transformer_decoder_config_name = to_decoder_config_path if self.transformer_decoder_config_name is not None else None
        self.transformer_tokenizer_name = to_tokenizer_path
        # set value if and only if decoder is assigned
        self.transformer_decoder_tokenizer_name = to_decoder_tokenizer_path if self.transformer_decoder_tokenizer_name is not None else None

        self.bert_config_dict = self.bert_config.to_dict()

        tokenizer = load_transformer_tokenizer(
            self.transformer_tokenizer_name,
            self.transformer_tokenizer_loading)
        self.vocab_size = tokenizer.vocab_size
        if self.transformer_decoder_tokenizer_name:
            decoder_tokenizer = load_transformer_tokenizer(
                self.transformer_decoder_tokenizer_name,
                self.transformer_decoder_tokenizer_loading)

            # if set bos and eos
            if decoder_tokenizer.bos_token is None:
                decoder_tokenizer.add_special_tokens({'bos_token': BOS_TOKEN})

            if decoder_tokenizer.eos_token is None:
                decoder_tokenizer.add_special_tokens({'eos_token': EOS_TOKEN})

            # overwrite tokenizer
            decoder_tokenizer.save_pretrained(to_decoder_tokenizer_path)

            self.decoder_vocab_size = decoder_tokenizer.vocab_size
            self.bos_id = decoder_tokenizer.bos_token_id
            self.eos_id = decoder_tokenizer.eos_token_id
Beispiel #6
0
columns_to_drop = [
    "Unnamed: 0", "ID", "Name", "Photo", "Nationality", "Flag", "Club",
    "Club Logo", "Value", "Wage", "Special", "Preferred Foot",
    "International Reputation", "Weak Foot", "Skill Moves", "Work Rate",
    "Body Type", "Real Face", "Position", "Jersey Number", "Joined",
    "Loaned From", "Contract Valid Until", "Height", "Weight", "LS", "ST",
    "RS", "LW", "LF", "CF", "RF", "RW", "LAM", "CAM", "RAM", "LM", "LCM", "CM",
    "RCM", "RM", "LWB", "LDM", "CDM", "RDM", "RWB", "LB", "LCB", "CB", "RCB",
    "RB", "Release Clause"
]

try:
    fifa.drop(columns_to_drop, axis=1, inplace=True)
except KeyError:
    logger.warning(f"Columns already dropped")

# ## Inicia sua análise a partir daqui

# In[4]:

# Verifica as primeiras linhas do conjunto de dados
fifa.head()

# In[5]:

# Faz uma descrição estatística sobre o conjunto de dados
fifa.describe().transpose()

# In[6]:
Beispiel #7
0
        if args.bkg:
            ovbinfile = "{0}_prefilt_ovbin.fits".format(
                basename.split("_cleanfilt")[0])
            if path.isfile(ovbinfile):
                log.info(
                    "Reading overshoots file present...Getting from {}".format(
                        ovbinfile))
                ovbintable = Table.read(ovbinfile, hdu=1)
            else:
                ovbintable = None
        else:
            ovbintable = None

else:
    log.warning(
        "You have not specified any files, please input the path to the files you want to see. Exiting."
    )
    sys.exit()

# ---------------------Options for data filtering / Plotting -------------------
if (not args.sci and not args.eng and not args.map and not args.bkg
        and not args.interactive):
    log.warning("No specific plot requested, making all")
    args.sci = True
    args.eng = True
    args.map = True
    args.bkg = True

if args.filtall:
    args.filtswtrig = True
    args.filtovershoot = True
def plot_bulk(plots,
              dirname,
              plot_images,
              metric,
              plot,
              baseline_count=3,
              add_legend=True,
              max_bpp=5,
              draw_markers=1):
    plot = helpers.utils.match_option(plot, ['fit', 'aggregate'])
    if dirname.endswith('/') or dirname.endswith('\\'):
        dirname = dirname[:-1]

    # Load data and select images for plotting
    df_all, labels = load_data(plots, dirname)
    plot_images = plot_images if len(
        plot_images) > 0 else [-1] + df_all[0].image_id.unique().tolist()
    logger.info(f'Selected images: {plot_images}')

    images_x = int(np.ceil(np.sqrt(len(plot_images))))
    images_y = int(np.ceil(len(plot_images) / images_x))

    update_ylim = False
    marker_legend = False

    # Plot setup
    func, fit_bounds = setup_fit(metric)
    y_min, y_max, metric_label = setup_plot(metric)

    # Setup drawing styles
    styles = [['r-', 'rx'], ['b--', 'b+'], ['k:', 'k2'], ['g-', 'gx'],
              ['m-', 'gx'], ['m--', 'gx'], ['m-.', 'gx'], ['m:', 'gx']]
    avg_markers = ['', '', '', 'o', 'o', '2', '+', 'X', '^', '.']

    # To retain consistent styles across plots, adjust the lists based on the number of baseline methods
    if baseline_count < 3:
        styles = styles[(3 - baseline_count):]
        avg_markers = avg_markers[(3 - baseline_count):]

    mse_labels = {}

    fig, ax = plt.subplots(images_y, images_x, sharex=True, sharey=True)
    fig.set_size_inches((images_x * 6, images_y * 4))

    if hasattr(ax, 'flat'):
        for axes in ax.flat:
            axes.axis('off')

    for ax_id, image_id in enumerate(plot_images):

        if images_y > 1:
            axes = ax[ax_id // images_x, ax_id % images_x]
        elif images_x > 1:
            axes = ax[ax_id % images_x]
        else:
            axes = ax

        axes.axis('on')

        # Select measurements for a specific image, if specified
        for dfc in df_all:
            if image_id >= 0:
                dfc['selected'] = dfc['image_id'].apply(
                    lambda x: x == image_id)
            else:
                dfc['selected'] = True

        for index, dfc in enumerate(df_all):

            x = dfc.loc[dfc['selected'], 'bpp'].values
            y = dfc.loc[dfc['selected'], metric].values

            X = np.linspace(max([0, x.min() * 0.9]), min([5, x.max() * 1.1]),
                            256)

            if plot == 'fit':
                # Fit individual images to a curve, then average the curves

                if image_id >= 0:
                    images = [image_id]
                else:
                    images = dfc.image_id.unique()

                Y = np.zeros((len(images), len(X)))
                mse_l = []

                for image_no, imid in enumerate(images):

                    x = dfc.loc[dfc['selected'] & (dfc['image_id'] == imid),
                                'bpp'].values
                    y = dfc.loc[dfc['selected'] & (dfc['image_id'] == imid),
                                metric].values

                    # Allow for larger errors for lower SSIM values
                    if metric in ['ssim', 'msssim']:
                        sigma = np.abs(1 - y).reshape((-1, ))
                    else:
                        sigma = np.ones_like(y).reshape((-1, ))

                    try:
                        popt, pcov = curve_fit(func,
                                               x,
                                               y,
                                               bounds=fit_bounds,
                                               sigma=sigma,
                                               maxfev=100000)
                        y_est = func(x, *popt)
                        mse = np.mean(np.power(y - y_est, 2))
                        mse_l.append(mse)
                        if mse > 0.1:
                            logger.warning(
                                'WARNING Large MSE for {} img=#{} = {:.2f}'.
                                format(labels[index], image_no, mse))

                    except RuntimeError as err:
                        logger.error(
                            f'{labels[index]} image ={imid} bpp={x} y ={y} err ={err}'
                        )

                    Y[image_no] = func(X, *popt)

                if image_id < 0:
                    logger.info(
                        'Fit summary - MSE for {} av={:.2f} max={:.2f}'.format(
                            labels[index], np.mean(mse_l), np.max(mse_l)))
                mse_labels[labels[index]] = np.mean(mse_l)

                yy = np.nanmean(Y, axis=0)
                axes.plot(X,
                          yy,
                          styles[index][0],
                          label='{} ({:.3f})'.format(labels[index],
                                                     mse_labels[labels[index]])
                          if add_legend else None)
                y_min = min([y_min, min(yy)]) if update_ylim else y_min

            elif plot == 'aggregate':
                # For each quality level (QF, #channels) find the average quality level
                dfa = dfc.loc[dfc['selected']]

                if 'n_features' in dfa:
                    dfg = dfa.groupby('n_features')
                else:
                    dfg = dfa.groupby('quality')

                x = dfg.mean()['bpp'].values
                y = dfg.mean()[metric].values

                axes.plot(x,
                          y,
                          styles[index][0],
                          label=labels[index] if add_legend else None,
                          marker=avg_markers[index],
                          alpha=0.65)
                y_min = min([y_min, min(y)]) if update_ylim else y_min

            elif plot == 'none':
                pass

            else:
                raise ValueError('Unsupported plot type!')

            if draw_markers > 0:

                if 'entropy_reg' in dfc:

                    if image_id >= 0 or draw_markers >= 2:

                        # No need to draw legend if multiple DCNs are plotted
                        detailed_legend = 'full' if marker_legend and index == baseline_count else False

                        style_mapping = {}

                        if 'n_features' in dfc and len(
                                dfc['n_features'].unique()) > 1:
                            style_mapping['hue'] = 'n_features'

                        if 'entropy_reg' in dfc and len(
                                dfc['entropy_reg'].unique()) > 1:
                            style_mapping['size'] = 'entropy_reg'

                        if 'quantization' in dfc and len(
                                dfc['quantization'].unique()) > 1:
                            style_mapping['style'] = 'quantization'

                        sns.scatterplot(data=dfc[dfc['selected']],
                                        x='bpp',
                                        y=metric,
                                        palette="Set2",
                                        ax=axes,
                                        legend=detailed_legend,
                                        **style_mapping)

                else:

                    if image_id >= 0:
                        axes.plot(x, y, styles[index][1], alpha=0.65)

        # Setup title
        n_images = len(dfc.loc[dfc['selected'], 'image_id'].unique())
        if n_images > 1:
            title = '{} for {} images ({})'.format(plot, n_images,
                                                   os.path.split(dirname)[-1])
        else:
            title = '\#{} : {}'.format(
                image_id, dfc.loc[dfc['selected'],
                                  'filename'].unique()[0].replace('.png', ''))

        # Fixes problems with rendering using the LaTeX backend
        if add_legend:
            for t in axes.legend().texts:
                t.set_text(t.get_text().replace('_', '-'))

        axes.set_xlim([-0.1, max_bpp + 0.1])
        axes.set_ylim([y_min * 0.95, y_max])
        axes.legend(loc='lower right')
        axes.set_title(title)
        if image_id // images_x == images_y - 1:
            axes.set_xlabel('Effective bpp')
        if image_id % images_x == 0:
            axes.set_ylabel(metric_label)

    return fig
Beispiel #9
0
    def create_task_on_agent(self):
        from agents.models import Agent

        agent = (
            Agent.objects.filter(pk=self.agent.pk)
            .only("pk", "version", "hostname", "agent_id")
            .first()
        )

        if self.task_type == "scheduled":
            nats_data = {
                "func": "schedtask",
                "schedtaskpayload": {
                    "type": "rmm",
                    "trigger": "weekly",
                    "weekdays": self.run_time_bit_weekdays,
                    "pk": self.pk,
                    "name": self.win_task_name,
                    "hour": dt.datetime.strptime(self.run_time_minute, "%H:%M").hour,
                    "min": dt.datetime.strptime(self.run_time_minute, "%H:%M").minute,
                },
            }

        elif self.task_type == "runonce":
            # check if scheduled time is in the past
            agent_tz = pytz.timezone(agent.timezone)
            task_time_utc = self.run_time_date.replace(tzinfo=agent_tz).astimezone(
                pytz.utc
            )
            now = djangotime.now()
            if task_time_utc < now:
                self.run_time_date = now.astimezone(agent_tz).replace(
                    tzinfo=pytz.utc
                ) + djangotime.timedelta(minutes=5)
                self.save(update_fields=["run_time_date"])

            nats_data = {
                "func": "schedtask",
                "schedtaskpayload": {
                    "type": "rmm",
                    "trigger": "once",
                    "pk": self.pk,
                    "name": self.win_task_name,
                    "year": int(dt.datetime.strftime(self.run_time_date, "%Y")),
                    "month": dt.datetime.strftime(self.run_time_date, "%B"),
                    "day": int(dt.datetime.strftime(self.run_time_date, "%d")),
                    "hour": int(dt.datetime.strftime(self.run_time_date, "%H")),
                    "min": int(dt.datetime.strftime(self.run_time_date, "%M")),
                },
            }

            if self.run_asap_after_missed and pyver.parse(agent.version) >= pyver.parse(
                "1.4.7"
            ):
                nats_data["schedtaskpayload"]["run_asap_after_missed"] = True

            if self.remove_if_not_scheduled:
                nats_data["schedtaskpayload"]["deleteafter"] = True

        elif self.task_type == "checkfailure" or self.task_type == "manual":
            nats_data = {
                "func": "schedtask",
                "schedtaskpayload": {
                    "type": "rmm",
                    "trigger": "manual",
                    "pk": self.pk,
                    "name": self.win_task_name,
                },
            }
        else:
            return "error"

        r = asyncio.run(agent.nats_cmd(nats_data, timeout=5))

        if r != "ok":
            self.sync_status = "initial"
            self.save(update_fields=["sync_status"])
            logger.warning(
                f"Unable to create scheduled task {self.name} on {agent.hostname}. It will be created when the agent checks in."
            )
            return "timeout"
        else:
            self.sync_status = "synced"
            self.save(update_fields=["sync_status"])
            logger.info(f"{agent.hostname} task {self.name} was successfully created")

        return "ok"
Beispiel #10
0
def master_problem(layer_pool, tlim=None, relaxation=True, enable_output=False):
    """
    Solve the master problem, either in its full version (MP)
    or in its relaxed version (RMP). Returns the following:
    - Objective value: minimization of sum(alpha[l] * h[l]), with h heights and l layer
    - Alpha values: alpha[l] represents layer selection
    - [RMP] Duals: one dual for each item
    """
    logger.info("RMP defining variables and constraints")

    # Solver
    if relaxation:
        slv = pywraplp.Solver("RMP", pywraplp.Solver.GLOP_LINEAR_PROGRAMMING)
    else:
        slv = pywraplp.Solver("MP", pywraplp.Solver.BOP_INTEGER_PROGRAMMING)

    # Enable verbose output from solver
    if enable_output:
        slv.EnableOutput()

    # Utility
    fsi, _, _ = layer_pool.superitems_pool.get_fsi()
    zsl = layer_pool.get_zsl()
    ol = layer_pool.get_ol()
    infinity = slv.infinity()
    n_layers = len(layer_pool)
    n_items = fsi.shape[-1]

    # Variables
    if relaxation:
        al = [slv.NumVar(0, infinity, f"alpha_{l}") for l in range(n_layers)]
    else:
        al = [slv.BoolVar(f"alpha_{l}") for l in range(n_layers)]

    # Constraints
    constraints = []
    coefficients = np.matmul(fsi.T, zsl)

    # Select each item at least once
    # sum(al[l] * zsl[s, l] * fsi[s, i])
    for i in range(n_items):
        c = slv.Constraint(1, infinity, f"c_{i}")
        for l in range(n_layers):
            if coefficients[i, l] > 0:
                c.SetCoefficient(al[l], float(coefficients[i, l]))
        constraints += [c]

    # Objective
    obj = slv.Objective()
    for l, h in enumerate(ol):
        obj.SetCoefficient(al[l], float(h))
    obj.SetMinimization()

    # Set a time limit in milliseconds
    if tlim is not None:
        slv.SetTimeLimit(1000 * tlim)

    # Solve
    logger.debug(f"RMP variables: {slv.NumVariables()}")
    logger.debug(f"RMP constraints: {slv.NumConstraints()}")
    status = slv.Solve()
    logger.debug(f"RMP iterations: {slv.iterations()}")

    # Extract results
    duals, alphas = None, None
    objective = float("inf")
    if status in (slv.OPTIMAL, slv.FEASIBLE):
        logger.info(f"RMP solved")

        # Extract alpha values
        alphas = [al[l].solution_value() for l in range(n_layers)]
        logger.debug(f"RMP alphas: {alphas}")
        if not all(alphas[l] in (0, 1) for l in range(n_layers)):
            logger.debug("RMP solution not feasible (at least one alpha value is not binary)")

        # Extract objective value
        objective = slv.Objective().Value()
        logger.debug(f"RMP objective: {objective}")

        # Extract duals
        if relaxation:
            duals = np.array([c.DualValue() for c in constraints])
            logger.debug(f"RMP duals: {duals}")
    else:
        logger.warning("RMP unfeasible")

    logger.debug(f"RMP time: {slv.WallTime() / 1000}")
    return objective, alphas, duals
def plot_curve(plots,
               axes,
               dirname='./data/rgb/clic256',
               images=[],
               plot='fit',
               draw_markers=None,
               metric='ssim',
               title=None,
               add_legend=True,
               marker_legend=True,
               baseline_count=3,
               update_ylim=False):

    # Parse input parameters
    draw_markers = draw_markers if draw_markers is not None else len(
        images) == 1
    plot = helpers.utils.match_option(plot, ['fit', 'aggregate'])

    df_all, labels = load_data(plots, dirname)

    if len(images) == 0:
        images = df_all[0]['image_id'].unique().tolist()

    # Plot setup
    func, fit_bounds = setup_fit(metric)
    y_min, y_max, metric_label = setup_plot(metric)

    # Select measurements for specific images, if specified
    for dfc in df_all:
        if len(images) > 0:
            dfc['selected'] = dfc['image_id'].apply(lambda x: x in images)
        else:
            dfc['selected'] = True

    # Setup drawing styles
    styles = [['r-', 'rx'], ['b--', 'b+'], ['k:', 'k2'], ['g-', 'gx'],
              ['m-', 'gx'], ['m--', 'gx'], ['m-.', 'gx'], ['m:', 'gx']]
    avg_markers = ['', '', '', 'o', 'o', '2', '+', 'x', '^', '.']

    # To retain consistent styles across plots, adjust the lists based on the number of baseline methods
    if baseline_count < 3:
        styles = styles[(3 - baseline_count):]
        avg_markers = avg_markers[(3 - baseline_count):]

    # Iterate over defined plots and draw data accordingly
    for index, dfc in enumerate(df_all):

        x = dfc.loc[dfc['selected'], 'bpp'].values
        y = dfc.loc[dfc['selected'], metric].values

        X = np.linspace(max([0, x.min() * 0.9]), min([5, x.max() * 1.1]), 256)

        if plot == 'fit':
            # Fit individual images to a curve, then average the curves

            Y = np.zeros((len(images), len(X)))
            mse_l = []

            for image_no, image_id in enumerate(images):

                x = dfc.loc[dfc['selected'] & (dfc['image_id'] == image_id),
                            'bpp'].values
                y = dfc.loc[dfc['selected'] & (dfc['image_id'] == image_id),
                            metric].values

                # Allow for larger errors for lower SSIM values
                if metric in ['ssim', 'msssim']:
                    sigma = np.abs(1 - y).reshape((-1, ))
                else:
                    sigma = np.ones_like(y).reshape((-1, ))

                try:
                    popt, pcov = curve_fit(func,
                                           x,
                                           y,
                                           bounds=fit_bounds,
                                           maxfev=10000,
                                           sigma=sigma)
                    y_est = func(x, *popt)
                    mse = np.mean(np.power(y - y_est, 2))
                    mse_l.append(mse)
                    if mse > 0.5:
                        logger.warning(
                            'WARNING Large MSE for {}:{} = {:.2f}'.format(
                                labels[index], image_no, mse))

                except RuntimeError:
                    logger.error(
                        f'{labels[index]} image ={image_id}, bpp ={x} y ={y}')

                Y[image_no] = func(X, *popt)

            if len(images) > 1:
                logger.info(
                    'Fit summary - MSE for {} av={:.2f} max={:.2f}'.format(
                        labels[index], np.mean(mse_l), np.max(mse_l)))

            yy = np.nanmean(Y, axis=0)
            axes.plot(X,
                      yy,
                      styles[index][0],
                      label=labels[index] if add_legend else None)
            y_min = min([y_min, min(yy)]) if update_ylim else y_min

        elif plot == 'aggregate':
            # For each quality level (QF, #channels) find the average quality level
            dfa = dfc.loc[dfc['selected']]

            if 'n_features' in dfa:
                dfg = dfa.groupby('n_features')
            else:
                dfg = dfa.groupby('quality')

            x = dfg.mean()['bpp'].values
            y = dfg.mean()[metric].values

            axes.plot(x,
                      y,
                      styles[index][0],
                      label=labels[index] if add_legend else None,
                      marker=avg_markers[index],
                      alpha=0.65)
            y_min = min([y_min, min(y)]) if update_ylim else y_min

        elif plot == 'none':
            pass

        else:
            raise ValueError('Unsupported plot type!')

        if draw_markers:

            if 'entropy_reg' in dfc:

                # No need to draw legend if multiple DCNs are plotted
                detailed_legend = 'full' if marker_legend and index == baseline_count else False

                style_mapping = {}

                if 'n_features' in dfc and len(dfc['n_features'].unique()) > 1:
                    style_mapping['hue'] = 'n_features'

                if 'entropy_reg' in dfc and len(
                        dfc['entropy_reg'].unique()) > 1:
                    style_mapping['size'] = 'entropy_reg'

                if 'quantization' in dfc and len(
                        dfc['quantization'].unique()) > 1:
                    style_mapping['style'] = 'quantization'

                sns.scatterplot(data=dfc[dfc['selected']],
                                x='bpp',
                                y=metric,
                                palette="Set2",
                                ax=axes,
                                legend=detailed_legend,
                                **style_mapping)

            else:
                axes.plot(x,
                          y,
                          styles[index][1],
                          alpha=10 / (sum(dfc['selected'])))

    n_images = len(dfc.loc[dfc['selected'], 'image_id'].unique())

    title = '{} : {}'.format(
        title if title is not None else os.path.split(dirname)[-1],
        '{} images'.format(n_images) if n_images > 1 else
        dfc.loc[dfc['selected'], 'filename'].unique()[0].replace('.png', ''))

    # Fixes problems with rendering using the LaTeX backend
    if add_legend:
        for t in axes.legend().texts:
            t.set_text(t.get_text().replace('_', '-'))

    axes.set_xlim([-0.1, 3.1])
    axes.set_ylim([y_min * 0.99, y_max])
    axes.set_title(title)
    axes.set_xlabel('Effective bpp')
    axes.set_ylabel(metric_label)
Beispiel #12
0
def pricing_problem_placement_mip(
    superitems_pool, superitems_in_layer, pallet_dims, tlim=None, enable_output=False
):
    """
    Solve the subproblem placement using a MIP formulation
    """
    logger.info("SP-P-MIP defining variables and constraints")

    # Store superitems dimensions
    ws, ds, _ = superitems_pool.get_superitems_dims()

    # Solver
    slv = pywraplp.Solver("SP-P-MIP", pywraplp.Solver.SCIP_MIXED_INTEGER_PROGRAMMING)

    # Enable verbose output from solver
    if enable_output:
        slv.EnableOutput()

    # Variables
    cix = {s: slv.IntVar(0, pallet_dims.width - ws[s], f"c_{s}_x") for s in superitems_in_layer}
    ciy = {s: slv.IntVar(0, pallet_dims.depth - ds[s], f"c_{s}_y") for s in superitems_in_layer}
    xsj, ysj = dict(), dict()
    for s in superitems_in_layer:
        for j in superitems_in_layer:
            if j != s:
                xsj[(s, j)] = slv.BoolVar(f"x_{s}_{j}")
                ysj[(s, j)] = slv.BoolVar(f"y_{s}_{j}")

    # Constraints
    # Enforce at least one relative positioning relationship
    # between each pair of items in a layer
    # xsj[s, j] + xsj[j, s] + ysj[s, j] + ysj[j, s] >= 1
    precedence_constraints = []
    for s in superitems_in_layer:
        for j in superitems_in_layer:
            if j > s:
                c = slv.Constraint(1, 2, f"p_{s}_{j}")
                c.SetCoefficient(xsj[s, j], 1)
                c.SetCoefficient(xsj[j, s], 1)
                c.SetCoefficient(ysj[s, j], 1)
                c.SetCoefficient(ysj[j, s], 1)
                precedence_constraints += [c]

    # Ensure that there is at most one spatial relationship
    # between items i and j along the width and depth dimensions
    # xsj[s,j] + xsj[j,s] <= 1
    # ysj[s,j] + ysj[j,s] <= 1
    precedence_x_constraints = []
    precedence_y_constraints = []
    for s in superitems_in_layer:
        for j in superitems_in_layer:
            if j > s:
                c = slv.Constraint(0, 1, f"px_{s}_{j}")
                c.SetCoefficient(xsj[s, j], 1)
                c.SetCoefficient(xsj[j, s], 1)
                precedence_x_constraints += [c]
                c = slv.Constraint(0, 1, f"py_{s}_{j}")
                c.SetCoefficient(ysj[s, j], 1)
                c.SetCoefficient(ysj[j, s], 1)
                precedence_y_constraints += [c]

    # Non-overlapping constraints
    # cix[s] + ws[s] <= cix[j] + pallet_dims.width * (1 - xsj[s, j])
    # ciy[s] + ds[s] <= ciy[j] + pallet_dims.depth * (1 - ysj[s, j])
    non_overlapping_x_constraints = []
    non_overlapping_y_constraints = []
    for s in superitems_in_layer:
        for j in superitems_in_layer:
            if j != s:
                # csy[s] - cjy[s] + pallet_dims.depth * ysj[s, j] <= pallet_dims.depth - ds[s]
                # csy[s] - cjy[s] + pallet_dims.depth * ysj[s, j] >= 0
                c = slv.Constraint(
                    -pallet_dims.width + ws[j], pallet_dims.width - ws[s], f"ox_{s}_{j}"
                )
                c.SetCoefficient(cix[s], 1)
                c.SetCoefficient(cix[j], -1)
                c.SetCoefficient(xsj[s, j], pallet_dims.width)
                non_overlapping_x_constraints += [c]
                c = slv.Constraint(
                    -pallet_dims.depth + ds[j], pallet_dims.depth - ds[s], f"oy_{s}_{j}"
                )
                c.SetCoefficient(ciy[s], 1)
                c.SetCoefficient(ciy[j], -1)
                c.SetCoefficient(ysj[s, j], pallet_dims.depth)
                non_overlapping_y_constraints += [c]

    # Set a time limit
    if tlim is not None:
        slv.SetTimeLimit(1000 * tlim)

    # Solve
    logger.debug(f"SP-P-MIP variables: {slv.NumVariables()}")
    logger.debug(f"SP-P-MIP constraints: {slv.NumConstraints()}")
    status = slv.Solve()
    logger.debug(f"SP-P-MIP iterations: {slv.iterations()}")

    # Extract results
    layer = None
    if status in (slv.OPTIMAL, slv.FEASIBLE):
        logger.info(f"SP-P-MIP solved")

        # Extract coordinates
        sol = dict()
        for s in superitems_in_layer:
            sol[f"c_{s}_x"] = cix[s].solution_value()
            sol[f"c_{s}_y"] = ciy[s].solution_value()

        # Build layer
        layer = utils.build_layer_from_model_output(
            superitems_pool,
            superitems_in_layer,
            sol,
            pallet_dims,
        )
    else:
        logger.warning("SP-P-MIP unfeasible")

    logger.debug(f"SP-P-MIP time: {slv.WallTime() / 1000}")
    return layer
Beispiel #13
0
def pricing_problem_placement_cp(
    superitems_pool, superitems_in_layer, pallet_dims, duals, tlim=None, enable_output=False
):
    """
    Solve the pricing subproblem placement using a CP approach
    """
    logger.info("SP-P-CP defining variables and constraints")

    # Utility
    ws, ds, _ = superitems_pool.get_superitems_dims()
    sduals = superitems_duals(superitems_pool, duals)

    # Model and Solver
    mdl = cp_model.CpModel()
    slv = cp_model.CpSolver()

    # Variables
    cblx = {
        s: mdl.NewIntVar(0, pallet_dims.width - ws[s], f"c_bl_{s}_x") for s in superitems_in_layer
    }
    cbly = {
        s: mdl.NewIntVar(0, pallet_dims.depth - ds[s], f"c_bl_{s}_y") for s in superitems_in_layer
    }
    ctrx = {s: mdl.NewIntVar(ws[s], pallet_dims.width, f"c_tr_{s}_x") for s in superitems_in_layer}
    ctry = {s: mdl.NewIntVar(ds[s], pallet_dims.width, f"c_tr_{s}_y") for s in superitems_in_layer}
    xint = [
        mdl.NewIntervalVar(cblx[s], mdl.NewConstant(ws[s]), ctrx[s], f"xint_{s}")
        for s in superitems_in_layer
    ]
    yint = [
        mdl.NewIntervalVar(cbly[s], mdl.NewConstant(ds[s]), ctry[s], f"yint_{s}")
        for s in superitems_in_layer
    ]

    # Constraints
    mdl.AddNoOverlap2D(xint, yint)
    mdl.AddCumulative(
        xint, [mdl.NewConstant(ds[s]) for s in superitems_in_layer], pallet_dims.depth
    )
    mdl.AddCumulative(
        yint, [mdl.NewConstant(ws[s]) for s in superitems_in_layer], pallet_dims.width
    )

    # Symmetry Breaking
    areas = [ws[s] * ds[s] for s in superitems_in_layer]
    area_ind = utils.argsort(areas, reverse=True)
    biggest_ind = superitems_in_layer[area_ind[0]]
    second_ind = superitems_in_layer[area_ind[1]]
    mdl.Add(cblx[biggest_ind] <= mdl.NewConstant(pallet_dims.width // 2))
    mdl.Add(cbly[biggest_ind] <= mdl.NewConstant(pallet_dims.depth // 2))
    mdl.Add(cblx[biggest_ind] <= cblx[second_ind])
    mdl.Add(cbly[biggest_ind] <= cbly[second_ind])

    # Search strategy
    indexes = utils.argsort([sduals[s] for s in superitems_in_layer], reverse=True)
    mdl.AddDecisionStrategy(
        [xint[i] for i in indexes], cp_model.CHOOSE_FIRST, cp_model.SELECT_MIN_VALUE
    )
    mdl.AddDecisionStrategy(
        [yint[i] for i in indexes], cp_model.CHOOSE_FIRST, cp_model.SELECT_MIN_VALUE
    )

    # Set a time limit in seconds
    if tlim is not None:
        slv.parameters.max_time_in_seconds = tlim

    # Solve
    slv.parameters.num_search_workers = 4
    slv.parameters.log_search_progress = enable_output
    slv.parameters.search_branching = cp_model.FIXED_SEARCH
    status = slv.Solve(mdl)

    # Extract results
    layer = None
    if status in (cp_model.OPTIMAL, cp_model.FEASIBLE):
        logger.info(f"SP-P-CP solved")

        # Extract coordinates
        sol = dict()
        for s in superitems_in_layer:
            sol[f"c_{s}_x"] = slv.Value(cblx[s])
            sol[f"c_{s}_y"] = slv.Value(cbly[s])

        # Build layer
        layer = utils.build_layer_from_model_output(
            superitems_pool,
            superitems_in_layer,
            sol,
            pallet_dims,
        )
    else:
        logger.warning("SP-P-CP unfeasible")

    logger.debug(f"SP-P-CP time: {slv.WallTime()}")
    return layer
Beispiel #14
0
def pricing_problem_no_placement_cp(
    superitems_pool, pallet_dims, duals, feasibility=None, tlim=None, enable_output=False
):
    """
    Solve the pricing subproblem no-placement using a CP approach
    """
    logger.info("SP-NP-CP defining variables and constraints")

    # Model and solver
    mdl = cp_model.CpModel()
    slv = cp_model.CpSolver()

    # Utility
    fsi, _, _ = superitems_pool.get_fsi()
    ws, ds, hs = superitems_pool.get_superitems_dims()
    n_superitems, n_items = fsi.shape

    # Variables
    ol = mdl.NewIntVar(0, max(hs), f"o_l")
    zsl = [mdl.NewBoolVar(f"z_{s}_l") for s in range(n_superitems)]

    # Constraints
    # Redundant valid cuts that force the area of
    # a layer to fit within the area of a bin
    mdl.Add(
        cp_model.LinearExpr.Sum(ws[s] * ds[s] * zsl[s] for s in range(n_superitems))
        <= pallet_dims.area
    )

    # Define the height of layer l
    for s in range(n_superitems):
        mdl.Add(ol >= hs[s] * zsl[s])

    # Enforce feasible placement
    if feasibility is not None:
        logger.info(f"SP-NP-MIP feasibility: max number of selected items <= {feasibility}")
        mdl.Add(cp_model.LinearExpr.Sum(zsl[s] for s in range(n_superitems)) <= feasibility)

    # No item repetition constraint
    for i in range(n_items):
        mdl.Add(cp_model.LinearExpr.Sum([fsi[s, i] * zsl[s] for s in range(n_superitems)]) <= 1)

    # Objective
    obj = ol - cp_model.LinearExpr.Sum(
        int(np.ceil(duals[i])) * fsi[s, i] * zsl[s]
        for i in range(n_items)
        for s in range(n_superitems)
    )
    mdl.Minimize(obj)

    # Search strategy
    duals_sort_index = utils.argsort(
        [sum([fsi[s, i] * duals[i] for i in range(n_items)]) for s in range(n_superitems)]
    )
    mdl.AddDecisionStrategy([ol], cp_model.CHOOSE_FIRST, cp_model.SELECT_MIN_VALUE)
    mdl.AddDecisionStrategy(
        [zsl[s] for s in duals_sort_index],
        cp_model.CHOOSE_FIRST,
        cp_model.SELECT_MAX_VALUE,
    )

    # Set a time limit in seconds
    if tlim is not None:
        slv.parameters.max_time_in_seconds = tlim

    # Solve
    slv.parameters.num_search_workers = 4
    slv.parameters.log_search_progress = enable_output
    slv.parameters.search_branching = cp_model.FIXED_SEARCH
    status = slv.Solve(mdl)

    # Extract results
    objective = float("inf")
    superitems_in_layer = None
    if status in (cp_model.OPTIMAL, cp_model.FEASIBLE):
        logger.info(f"SP-NP-CP solved")

        # Extract objective value
        objective = slv.ObjectiveValue()
        logger.debug(f"SP-NP-CP objective: {objective}")

        # Extract selected superitems
        superitems_in_layer = [s for s in range(n_superitems) if slv.Value(zsl[s]) == 1]
        logger.debug(f"SP-NP-CP selected {len(superitems_in_layer)}/{n_superitems} superitems")

        logger.debug(f"SP-NP-CP computed layer height: {slv.Value(ol)}")
    else:
        logger.warning("SP-NP-CP unfeasible")

    logger.debug(f"SP-NP-CP time: {slv.WallTime()}")
    return objective, superitems_in_layer
Beispiel #15
0
def pricing_problem_no_placement_mip(
    superitems_pool, pallet_dims, duals, feasibility=None, tlim=None, enable_output=False
):
    """
    Solve the pricing subproblem no-placement using a MIP approach
    """
    logger.info("SP-NP-MIP defining variables and constraints")

    # Solver
    slv = pywraplp.Solver("SP-NP-MIP", pywraplp.Solver.SCIP_MIXED_INTEGER_PROGRAMMING)

    # Enable verbose output from solver
    if enable_output:
        slv.EnableOutput()

    # Utility
    ws, ds, hs = superitems_pool.get_superitems_dims()
    sduals = superitems_duals(superitems_pool, duals)
    n_superitems = len(superitems_pool)

    # Variables
    ol = slv.IntVar(0, max(hs), f"o_l")
    zsl = [slv.BoolVar(f"z_{s}_l") for s in range(n_superitems)]

    # Constraints
    # Redundant valid cuts that force the area of
    # a layer to fit within the area of a bin
    # ws * ds * zsl <= pallet_dims.area
    area = slv.Constraint(0, pallet_dims.area, "area")
    for s in range(n_superitems):
        area.SetCoefficient(zsl[s], ws[s] * ds[s])

    # Define layer height
    # ol >= zsl * hs
    height_constraints = []
    for s in range(n_superitems):
        hc = slv.Constraint(0, max(hs), f"hc_{s}")
        hc.SetCoefficient(ol, 1)
        hc.SetCoefficient(zsl[s], hs[s])
        height_constraints += [hc]

    # Enforce feasible placement
    # sum(zsl) <= feasibility
    logger.info(f"SP-NP-MIP feasibility: max number of selected items <= {feasibility}")
    f = slv.Constraint(1, feasibility, "feasibility")
    for s in range(n_superitems):
        f.SetCoefficient(zsl[s], 1)

    # Compute reward for greater number of selected superitems
    reward = 1 / (sduals.max() + n_superitems)
    zero_reward = np.where(sduals == 0, reward, 0)
    logger.debug(f"SP-NP-MIP zero duals reward: {reward}")

    # Objective
    # ol - sum(zsl * (sduals + zero_reward))
    obj = slv.Objective()
    obj.SetCoefficient(ol, 1)
    for s in range(n_superitems):
        obj.SetCoefficient(zsl[s], -sduals[s] - zero_reward[s])
    obj.SetMinimization()

    # Set a time limit in milliseconds
    if tlim is not None:
        slv.SetTimeLimit(1000 * tlim)

    # Solve
    logger.debug(f"SP-NP-MIP variables: {slv.NumVariables()}")
    logger.debug(f"SP-NP-MIP constraints: {slv.NumConstraints()}")
    status = slv.Solve()
    logger.debug(f"SP-NP-MIP iterations: {slv.iterations()}")

    # Extract results
    objective = float("inf")
    superitems_in_layer = None
    if status in (slv.OPTIMAL, slv.FEASIBLE):
        logger.info(f"SP-NP-MIP solved")

        # Extract objective value
        objective = slv.Objective().Value()
        logger.debug(f"SP-NP-MIP objective: {objective}")

        # Extract selected superitems
        superitems_in_layer = [s for s in range(n_superitems) if zsl[s].solution_value() == 1]
        logger.debug(f"SP-NP-MIP selected {len(superitems_in_layer)}/{n_superitems} superitems")

        logger.debug(f"SP-NP-MIP computed layer height: {ol.solution_value()}")
    else:
        logger.warning("SP-NP-MIP unfeasible")

    logger.debug(f"SP-NP-MIP time: {slv.WallTime() / 1000}")
    return objective, superitems_in_layer
Beispiel #16
0
    def merge_args(cls, args, task_args):
        from dataclasses import asdict
        for k, v in asdict(task_args).items():
            setattr(args, k, v)
        return args


def generate_method_kwargs_from_arguments(cls, method, args: dict):
    import inspect
    from dataclasses import asdict
    valid_kwargs = inspect.signature(cls.__dict__[method]).parameters
    kwargs = dict((name, args[name]) for name in valid_kwargs if name in args)
    return kwargs


def create_instance_from_arguments(cls, args: dict):
    kwargs = generate_method_kwargs_from_arguments(cls,
                                                   method="__init__",
                                                   args=args)
    return cls(**kwargs)


if __name__ == '__main__':
    task_args = TaskArguments.parse_args()
    logger.debug(f"{task_args}")
    logger.info(f"{asdict(task_args)}")

    default_args = TaskArguments()
    comp_result = task_args.compare(default_args)
    logger.warning(f"{comp_result}")
Beispiel #17
0
def long_task(
        self, data_list: dict, project_name: str, need_email: str, email_address: str
):
    """

    Args:
        email_address: 邮件地址
        need_email: 需要发送邮件
        project_name: 项目名称
        self:
        data_list: {
            nodes:[]
            relationships:[]
        }

    Returns:

    """
    t1 = time.time()

    port = PortDetect(7777, 7800).get_available_range()
    pwd = generate_password()
    id_ = self.request.id

    # 在js取到 data['state'] != 'CREATE NEO4J SANDBOX' 后将配置展示(仅仅展示)
    self.update_state(
        state="CREATE NEO4J SANDBOX",
        meta={"current": 1, "total": 4, "status": "", "port": port, "password": pwd},
    )
    update_task_process(id_, 1)

    container = None
    try:
        client = docker.from_env()
        container = client.containers.run(
            get_neo4j_version(),
            detach=True,
            environment=[f"NEO4J_AUTH=neo4j/{pwd}"],
            volumes={get_neo_file_path() + id_: {"bind": "/data", "mode": "rw"}},
            # remove=True,
            ports={"7687/tcp": port},
            name=id_,
        )
        config = {
            "container_id": container.id,
            "port": port,
            "password": pwd,
            "task_id": id_,
        }

        self.update_state(
            state="INITIALIZE NEO4J", meta={"current": 2, "total": 4, "status": ""}
        )
        update_task_process(id_, 2)
    except Exception as _e:
        if container and container.status != "exited":
            container.stop()
            container.remove()
        change_project_status(id_, status=0)
        raise Exception(_e)

    # 等待sandbox中的neo4j完成启动
    connect_failed_times = 0
    while connect_failed_times <= 4:
        time.sleep(15)
        try:
            Graph("bolt://{}:{}".format(NEO_HOST, str(port)), password=pwd)
            break
        except Exception as _e:
            logger.warning(
                f"{id_} connect failed, "
                f"retry {connect_failed_times},"
                f" error is {_e}"
            )
            connect_failed_times += 1
            continue

    if connect_failed_times > 4:
        if container and container.status != "exited":
            container.stop()
            container.remove()
        change_project_status(id_, status=0)
        raise Exception("time over")

    self.update_state(state="FILL DATA", meta={"current": 3, "total": 4})
    update_task_process(id_, 3)
    # 生成子图
    try:
        base_graph = Graph(
            "bolt://" + NEO_HOST + ":" + SecureInfo.get_neo4j_port(),
            password=SecureInfo.get_neo4j_password(),
        )
        new_graph = Graph("bolt://" + NEO_HOST + ":" + str(port), password=pwd)

        nodes = data_list["nodes"]
        # 关系暂时不考虑
        # relationships = data_list['relationships']
        subgraph_list = []

        # r = redis.Redis(host=REDIS_HOST, port=6379, db=1)

        for node in nodes:
            subgraph = get_remote_graph_from_one_node(base_graph, node)
            if subgraph:
                # r.lpush(f"{id_}-midway-success", node["name"])
                subgraph_list.append(subgraph)
            # else:
            # r.lpush(f"{id_}-midway-failed", node["name"])

        if len(subgraph_list) > 1:
            subgraph_fin = reduce(lambda x, y: x | y, subgraph_list)
            new_graph.create(subgraph_fin)
        elif len(subgraph_list) == 1:
            subgraph_fin = subgraph_list[0]
            new_graph.create(subgraph_fin)
        else:
            pass
    except Exception as _e:
        logger.error(f"{id_} failed, {container.status}")
        logger.exception(_e)
        if container and container.status != "exited":
            container.stop()
            container.remove()
        change_project_status(id_, status=0)
        raise Exception(_e)

    # self.update_state(state='COMPLETE',
    #                   meta={'current': 4, 'total': 4})

    runtime = round(time.time() - t1, 4)
    change_project_status(id_, config, runtime)

    if need_email:
        html = generate_mail_html(pwd, port, SERVER, runtime)
        send_mail(
            subject=f"neo create success, project: {project_name}",
            receivers=[email_address],
            html=html,
        )

    time.sleep(2)
    update_task_process(id_, 4)
    # 可以在return时加上密码和端口,供用户自己连接neo4j
    return {"current": 4, "total": 4, "config": config, "status": "Task completed!"}
def run(args):
    global GECKO, logger
    GECKO = args.gecko_path
    if not GECKO:
        logger.error("Must specify --gecko-path.")
        sys.exit(1)

    if not Path(GECKO).is_dir():
        if args.clone:
            clone_gecko()
        else:
            logger.error(f"Gecko path '{GECKO}' does not exist! Pass --clone to clone it to this location.")
            sys.exit(1)

    # initialize schedulers to analyze
    cwd = os.getcwd()
    strategy_dir = here / "strategies"
    strategy_paths = [s for s in strategy_dir.glob("*.py") if s.name != "__init__.py"]

    schedulers = []
    for path in strategy_paths:
        logger.debug(f"Creating scheduler using strategy from {path.relative_to(cwd)}")
        schedulers.append(Scheduler(path))

    # use what was actually scheduled as a baseline comparison
    schedulers.append(Scheduler("baseline"))

    # compute pushes in range
    pushes = make_push_objects(
        from_date=args.from_date, to_date=args.to_date, branch=args.branch
    )
    orig_rev = hg(["log", "-r", ".", "-T", "{node}"])
    logger.debug(f"Found previous revision: {orig_rev}")

    try:
        for i, push in enumerate(pushes):
            logger.info(f"Analyzing https://treeherder.mozilla.org/#/jobs?repo=autoland&revision={push.rev} ({i+1}/{len(pushes)})")  # noqa

            hg(["update", push.rev])

            for scheduler in schedulers:
                logger.opt(ansi=True).debug(f"<cyan>Scheduler {scheduler.name}</cyan>")
                try:
                    scheduler.analyze(push)
                except MissingDataError:
                    logger.warning(f"MissingDataError: Skipping {push.rev}")

    finally:
        logger.debug("restoring repo")
        hg(["update", orig_rev])

    header = [
        "Scheduler",
        "Total Tasks",
        "Primary Backouts",
        "Secondary Backouts",
        "Secondary Backout Rate",
        "Scheduler Efficiency",
    ]

    data = []
    for sched in schedulers:
        s = sched.score
        data.append([
            sched.name,
            s.tasks,
            s.primary_backouts,
            s.secondary_backouts,
            s.secondary_backout_rate,
            s.scheduler_efficiency,
        ])

    data.sort(key=lambda x: x[-1], reverse=True)
    data.insert(0, header)
    return data
Beispiel #19
0
 def plot(self, data_num):
     logger.warning("Warning: Python prints plots in a stupid stupid way!")
     plt.imshow(np.log(self.intensity_mats[data_num][-1:0:-1] + 1),
                aspect='auto')
Beispiel #20
0
        "date_born": "***********",
        "nationality": "BELARUS",
        "passport_number": "*********",
        "expare_date": "*************",
        "code_phone": "44",
        "phone_number": "************",
        "email": "*******************"
    }]
}

driver = init_driver(logger, options)
centre, category, sub_category = get_centre_category_sub_category(driver)
count = 1
while True:
    try:
        logger.warning(f"try to booking {count=}")
        sleep(randint(3, 5) + random())
        centre.send_keys(options['center'])

        sleep(randint(3, 5) + random())
        category.send_keys(options['category'])

        sleep(randint(5, 8) + random())
        sub_category.send_keys(options['sub_category'])

        sleep(random())
        continue_btn = driver.find_element_by_xpath(
            "/html/body/app-root/div/app-eligibility-criteria/section/form/mat-card[2]/button"
        )
        if continue_btn.is_enabled():
            logger.warning("continue button is enable")
Beispiel #21
0
    def __init__(self):

        logger.debug('tako')
        logger.warning('ika')
        logger.info('日本人')
Beispiel #22
0
    def train(self, args, train_examples, eval_examples):

        logger.info(
            f"Start train: {len(train_examples)} train examples, {len(eval_examples)} eval examples."
        )

        self.save_args(args, args.latest_dir)

        #  if is_master_process(args):
        #      tb_writer = SummaryWriter()

        train_dataset, _ = self.examples_to_dataset(train_examples,
                                                    args.train_max_seq_length)
        train_dataloader = generate_dataloader(
            args,
            train_dataset,
            batch_size=args.per_gpu_train_batch_size,
            keep_order=False,
            collate_fn=self.collate_fn)
        logger.info(f"Start training ...")
        logger.info(f"  Num examples    = {len(train_examples)}")
        logger.info(f"  Num epoch steps = {len(train_dataloader)}")
        logger.info(f"  Num epochs = {args.num_train_epochs}")
        logger.info(f"  Batch size = {args.per_gpu_train_batch_size}")

        steps_per_epoch = len(
            train_dataloader) // args.gradient_accumulation_steps

        if args.max_steps > 0:
            total_steps = args.max_steps
            args.num_train_epochs = args.max_steps // steps_per_epoch + 1
        else:
            total_steps = steps_per_epoch * args.num_train_epochs
        args.total_steps = total_steps

        logger.info(
            f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}"
        )
        logger.info(f"  Total optimization steps = {total_steps}")

        model, optimizer, scheduler = self.build_model(args)
        tokenizer = self.tokenizer

        # Check if saved optimizer or scheduler states exist
        model_path = Path(args.model_path)
        optimizer_saved_file = model_path / "optimizer.pt"
        scheduler_saved_file = model_path / "scheduler.pt"
        if optimizer_saved_file.exists() and scheduler_saved_file.exists():
            optimizer.load_state_dict(torch.load(optimizer_saved_file))
            scheduler.load_state_dict(torch.load(scheduler_saved_file))

        if args.fp16:
            try:
                from apex import amp
            except ImportError:
                raise ImportError(
                    "Please install apex from https://github.com/nvidia/apex to use fp16."
                )
            model, optimizer = amp.initialize(model,
                                              optimizer,
                                              opt_level=args.fp16_opt_level)
            args.amp = amp
        # multi-gpu training (should be after apex fp16 initialization)
        if args.n_gpu > 1:
            model = torch.nn.DataParallel(model)
        # Distributed training (should be after apex fp16 initialization)
        #  if args.local_rank != -1:
        if is_multi_processes(args):
            model = torch.nn.parallel.DistributedDataParallel(
                model,
                device_ids=[args.local_rank],
                output_device=args.local_rank,
                find_unused_parameters=True)

        trained_steps = 0
        epochs_trained = 0
        steps_trained_in_current_epoch = 0
        # Check if continuing training from a checkpoint
        model_path = Path(args.model_path)
        output_dir = Path(args.output_dir)

        if model_path.exists() and "checkpoint" in str(model_path):
            # set trained_steps to trained_steps of last saved checkpoint from model path
            trained_steps = int(model_path.parts()[-1].split("-")[-1])
            epochs_trained = trained_steps // (
                len(train_dataloader) // args.gradient_accumulation_steps)
            steps_trained_in_current_epoch = trained_steps % (
                len(train_dataloader) // args.gradient_accumulation_steps)

            logger.info(
                "  Continuing training from checkpoint, will skip to saved trained_steps"
            )
            logger.info("  Continuing training from epoch %d", epochs_trained)
            logger.info("  Continuing training from global step %d",
                        trained_steps)
            logger.info("  Will skip the first %d steps in the first epoch",
                        steps_trained_in_current_epoch)

        train_loss, logging_loss = 0.0, 0.0

        best_index = args.best_index
        if best_index in ['loss']:
            best_value = float('inf')
            best_type = 'min'
        else:
            best_value = 0.0
            best_type = 'max'  # ['max', 'min']

        model.zero_grad()

        # https://arxiv.org/abs/2002.10345
        # https://github.com/lonePatient/BERT-SDA
        enable_kd = args.enable_kd
        if enable_kd:
            logger.warning("Enable knowledge distillation.")
            from torch.nn import MSELoss
            kd_loss_fct = MSELoss()
            kd_model = copy.deepcopy(model)
            kd_model.eval()

        enable_sda = args.enable_sda
        if enable_sda:
            from torch.nn import MSELoss
            sda_loss_fct = MSELoss()
            history_logits = []
            sda_teachers = args.sda_teachers
            sda_stategy = args.sda_stategy

            if args.sda_empty_first:
                teacher_models = []
            else:
                t_model = copy.deepcopy(model)
                t_model.eval()
                teacher_models = [t_model]

            #  best_logits = []

        #  train_iterator = trange(
        #      epochs_trained,
        #      int(args.num_train_epochs),
        #      desc="Epoch",
        #      disable=args.local_rank not in [-1, 0],
        #  )
        #  for epoch in train_iterator:
        for epoch in range(epochs_trained, args.num_train_epochs):

            #  epoch_iterator = tqdm(train_dataloader,
            #                        desc="Iteration",
            #                        disable=args.local_rank not in [-1, 0])
            #  for step, batch in enumerate(epoch_iterator):

            pbar = Progbar(target=len(train_dataloader),
                           stateful_metrics=['loss'],
                           desc=f"Epoch({epoch+1}/{args.num_train_epochs})")
            for step, batch in enumerate(train_dataloader):
                if steps_trained_in_current_epoch > 0:
                    steps_trained_in_current_epoch -= 1
                    continue

                model.train()
                batch = tuple(t.to(args.device) for t in batch)

                outputs = self.on_train_step(args, model, step, batch)
                #  inputs = self.batch_to_inputs(args, batch)
                #  outputs = model(**inputs)

                #  logger.debug(f"outputs: {outputs}")
                # -------- loss --------
                loss, logits = outputs[:2]

                if enable_kd:
                    inputs = self.batch_to_inputs(args, batch)
                    if "labels" in inputs:
                        inputs['labels'] = None
                    with torch.no_grad():
                        #  kd_logits = kd_model(**inputs)[0]
                        kd_logits = kd_model(**inputs)[1]
                    kd_loss = kd_loss_fct(outputs[1], kd_logits)
                    loss += args.kd_coeff * kd_loss

                if enable_sda:
                    if teacher_models:
                        inputs = self.batch_to_inputs(args, batch)
                        if "labels" in inputs:
                            inputs['labels'] = None
                        with torch.no_grad():
                            teacher_logits = [
                                m(**inputs)[1] for m in teacher_models
                            ]
                        teacher_logits = torch.stack(teacher_logits)
                        teacher_logits = torch.mean(teacher_logits, dim=0)
                        sda_loss = sda_loss_fct(logits, teacher_logits)
                        #  sda_loss = Variable(sda_loss, requires_grad=True)
                        loss += sda_loss * args.sda_coeff

                    #  if best_logits:
                    #      sda_logits = torch.stack(best_logits)
                    #      sda_logits = torch.mean(sda_logits, dim=0)
                    #      if sda_logits.shape[0] == logits.shape[0]:
                    #          sda_loss = sda_loss_fct(logits, sda_logits)
                    #          #  sda_loss = Variable(sda_loss, requires_grad=True)
                    #          loss += sda_loss * args.sda_coeff

                #  loss = Variable(loss, requires_grad=True)
                #  inputs = self.batch_to_inputs(args, batch)
                #  logger.debug(f"inputs: {inputs}")
                #  logger.info(f"loss: {loss}")

                if loss is None:
                    continue

                if args.n_gpu > 1:
                    loss = loss.mean()
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

                lr = scheduler.get_last_lr()[0]
                #  lr = scheduler.get_lr()[0]
                pbar.update(step + 1,
                            values=[('lr', lr), ('loss', loss.item())])
                train_loss += loss.item()

                if (step + 1) % args.gradient_accumulation_steps == 0:
                    if args.fp16:
                        torch.nn.utils.clip_grad_norm_(
                            amp.master_params(optimizer), args.max_grad_norm)
                    else:
                        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       args.max_grad_norm)
                    optimizer.step()
                    scheduler.step()
                    model.zero_grad()
                    trained_steps += 1

                    if enable_kd:
                        decay = min(args.kd_decay,
                                    (1 + trained_steps) / (10 + trained_steps))
                        one_minus_decay = 1.0 - decay
                        with torch.no_grad():
                            parameters = [
                                p for p in model.parameters()
                                if p.requires_grad
                            ]
                            for s_param, param in zip(kd_model.parameters(),
                                                      parameters):
                                s_param.sub_(one_minus_decay *
                                             (s_param - param))
                    if enable_sda:
                        decay = min(args.sda_decay,
                                    (1 + trained_steps) / (10 + trained_steps))
                        one_minus_decay = 1.0 - decay
                        with torch.no_grad():
                            for sda_model in teacher_models:
                                parameters = [
                                    p for p in model.parameters()
                                    if p.requires_grad
                                ]
                                for s_param, param in zip(
                                        sda_model.parameters(), parameters):
                                    s_param.sub_(one_minus_decay *
                                                 (s_param - param))

                # -------- Save models --------
                if is_master_process(
                        args) and trained_steps % steps_per_epoch == 0:

                    # -------- Save checkpoint --------
                    if args.save_checkpoints:
                        checkpoint_dir = f"checkpoint-{trained_steps}"

                        #  checkpoint_path = output_dir / checkpoint_dir
                        checkpoint_path = Path(
                            args.latest_dir) / checkpoint_dir

                        self.save_model(args, model, tokenizer, optimizer,
                                        scheduler, checkpoint_path)

                    # -------- Evaluate --------
                    if not args.no_eval_on_each_epoch:
                        logger.info(
                            f"Epoch({epoch+1}/{args.num_train_epochs}) evaluating."
                        )
                        eval_logs = {}
                        eval_results = self.evaluate(args, model,
                                                     eval_examples)
                        for key, value in eval_results.items():
                            eval_key = "eval_{}".format(key)
                            eval_logs[eval_key] = f"{value:.6f}"
                        loss_scalar = (train_loss -
                                       logging_loss) / steps_per_epoch
                        learning_rate_scalar = scheduler.get_last_lr()[0]
                        #  learning_rate_scalar = scheduler.get_lr()[0]
                        eval_logs[
                            "learning_rate"] = f"{learning_rate_scalar:.6f}"
                        eval_logs["loss_scalar"] = f"{loss_scalar:.6f}"
                        #  for key, value in eval_logs.items():
                        #      tb_writer.add_scalar(key, value, trained_steps)
                        logger.debug(
                            json.dumps({
                                **eval_logs,
                                **{
                                    "step": trained_steps
                                }
                            }))
                        logging_loss = train_loss

                        # -------- Save best model --------
                        #  best_index = 'f1'  # ['f1', 'acc', 'recall', 'loss']
                        best_index = args.best_index
                        eval_value = eval_results[best_index]
                        is_best = False
                        if best_index in ['loss']:
                            if eval_value < best_value:
                                is_best = True
                        else:
                            if eval_value > best_value:
                                is_best = True
                        if is_best:
                            logger.warning(
                                f"Best {best_index}: {eval_value:.6f} ({eval_value - best_value:.6f})"
                            )
                            #  if enable_sda:
                            #      best_logits.append(logits.detach())
                            #      if len(best_logits) > sda_teachers:
                            #          best_logits = best_logits[1:]

                            best_value = eval_value

                            #  bestmodel_path = output_dir / f"best_fold{args.fold}"
                            bestmodel_path = Path(args.latest_dir) / "best"

                            self.save_model(args, model, tokenizer, optimizer,
                                            scheduler, bestmodel_path)

                            if args.save_checkpoints:
                                best_symlink = output_dir / "best/best_checkpoint"
                                if best_symlink.is_symlink():
                                    best_symlink.unlink()
                                os.symlink(f"../{checkpoint_dir}",
                                           best_symlink)
                        else:
                            logger.info(
                                f"dev-{best_index}/best-{best_index}: {eval_value:.6f}/{best_value:.6f}"
                            )

            #  if enable_sda:
            #      best_logits.append(logits.detach())
            #      if len(best_logits) > sda_teachers:
            #          best_logits = best_logits[1:]

            if enable_sda:
                if sda_stategy == "recent_models":
                    if len(teacher_models) >= sda_teachers:
                        teacher_models = teacher_models[1:sda_teachers + 1]
                    t_model = copy.deepcopy(model)
                    t_model.eval()
                    teacher_models.append(t_model)
                elif sda_stategy == "earliest_models":
                    if len(teacher_models) < sda_teachers:
                        t_model = copy.deepcopy(model)
                        t_model.eval()
                        teacher_models.append(t_model)
                elif sda_stategy == 'latest_model':
                    t_model = copy.deepcopy(model)
                    t_model.eval()
                    teacher_models = [t_model]
                elif sda_stategy == 'clone_models':
                    if len(teacher_models) == 0:
                        t_model = copy.deepcopy(model)
                    else:
                        t_model = copy.deepcopy(teacher_models[-1])
                    t_model.eval()
                    teacher_models.append(t_model)
                    if len(teacher_models) > sda_teachers:
                        teacher_models = teacher_models[1:sda_teachers + 1]

                #  if len(teacher_models) < sda_teachers:
                #      if teacher_models:
                #          tmodel = copy.deepcopy(teacher_models[-1])
                #      else:
                #          t_model = copy.deepcopy(model)
                #      t_model.eval()
                #      teacher_models.append(t_model)

            print(" ")
            if 'cuda' in str(args.device):
                torch.cuda.empty_cache()

        #      if args.max_steps > 0 and trained_steps > args.max_steps:
        #          epoch_iterator.close()
        #          break
        #  if args.max_steps > 0 and trained_steps > args.max_steps:
        #      train_iterator.close()
        #      break

        #  if is_multi_processes(args):
        #      tb_writer.close()
        return trained_steps, train_loss / trained_steps
def load_session_data(session: dict, key: dict, sampling_rate: int):
    """
        loads and cleans up the bonsai data for one session
    """
    logger.debug(
        f'Loading Bonsai Behavior data for session: {session["name"]}')

    # load analog
    analog = load_bin(session["ai_file_path"],
                      nsigs=session["n_analog_channels"])

    # get analog inputs between frames start/end times
    n_samples_per_frame = int(sampling_rate / 60)
    end_cut = (session["trigger_times"][-1] + session["bonsai_cut_start"] +
               n_samples_per_frame)
    _analog = analog[session["bonsai_cut_start"]:end_cut] / 5

    # get signals in high sampling rate
    analog_data = dict(
        pump=5 - _analog[:, 1],
        speaker=_analog[:, 2]  # 5 -  to invert signal
    )

    # go from samples to frame times
    for name, sample_values in analog_data.items():
        frames_values = sample_values[::n_samples_per_frame]

        if len(frames_values) != session["n_frames"]:
            raise ValueError("Wrong number of frames")

        # add to key
        key[name] = frames_values

    # load csv data
    try:
        logger.debug(f"Loading CSV file ({size(session['csv_file_path'])})")
        data = pd.read_csv(session["csv_file_path"])
    except Exception:
        logger.warning(f'Failed to open csv for {session["name"]}')
        return None

    if len(data.columns) < 5:
        logger.warning("Skipping because of incomplete CSV")
        return None  # first couple recordings didn't save all data

    logger.debug("Data loaded, cleaning it up")
    data.columns = [
        "ROI activity",
        "lick ROI activity",
        "mouse in ROI",
        "mouse in lick ROI",
        "deliver reward signal",
        "reward available signal",
    ]

    # make sure csv data has same length as the number of frames (off by max 2)
    delta = session["n_frames"] - len(data)
    if delta > 2:
        raise ValueError(
            f"We got {session['n_frames']} frames but CSV data has {len(data)} rows"
        )
    if not delta:
        raise NotImplementedError("This case is not covered")
    pad = np.zeros(delta)

    # add key entries
    key["reward_signal"] = np.concatenate(
        [data["deliver reward signal"].values, pad])
    key["trigger_roi"] = np.concatenate([data["mouse in ROI"].values, pad])
    key["reward_roi"] = np.concatenate([data["mouse in lick ROI"].values, pad])
    key["reward_available_signal"] = np.concatenate(
        [data["reward available signal"].values, pad])
    return key
Beispiel #24
0
 def _store_symlink(self, job, tempdir, archive, member, **kwargs):
     """ this should handle storing metadata for symlink - I'm guessing we shouldn't try to read this as a file? """
     logger.warning(
         f"_store_link is not actually implemented: {self} {tempdir} {archive} {kwargs} {member} {job}"
     )
Beispiel #25
0
        Run the Attention model, and save predictions
        """

        x_train, x_test, y_train, y_test = self.preprocess()

        predicted, observed = self.fit_model(x_train, x_test, y_train, y_test)

        self.save_results(predicted, observed, output_dir)


if __name__ == '__main__':

    output_path = sys.argv[1]  # Where to save outputs
    if not os.path.exists(output_path):
        # Make the directory
        logger.warning(f"{output_path} does not exist, creating")
        os.makedirs(output_path)
    logger.info(f"Outputs will be saved to {output_path}")

    # For parallel runs, use task id from SLURM array job.
    # Passed in via env variable
    try:
        test_bear_idx = int(os.getenv("SLURM_ARRAY_TASK_ID"))
        logger.info(f"Index: {test_bear_idx}")
    except TypeError:
        # Empty variable if not run in a parallel setting (interactive mode)
        logger.warning("Non-interactive setting, index is set to 0")
        test_bear_idx = 0  # Hardcode to a random value

    all_bears = unpack_data()
Beispiel #26
0
    def from_config(environment: Environment, cfg: Config) -> 'Session':
        """Creates a new repair session according to a given configuration."""
        logger.debug('preparing patch directory')
        dir_patches = cfg.dir_patches
        if os.path.exists(dir_patches):
            logger.warning("clearing existing patch directory")
            for fn in glob.glob(f'{dir_patches}/*.diff'):
                if os.path.isfile(fn):
                    os.remove(fn)
        logger.debug('prepared patch directory')

        # ensure that Kaskara is installed
        logger.info('ensuring that kaskara installation is complete '
                    '(this may take 20 minutes if Kaskara is not up-to-date)')
        kaskara.post_install()
        logger.info('ensured that kaskara installation is complete')

        # seed the RNG
        # FIXME use separate RNG for each session
        random.seed(cfg.seed)

        logger.info(f"using {cfg.threads} threads")
        logger.info(f"using language: {cfg.program.language.value}")
        logger.info(f"using optimizations: {cfg.optimizations}")
        logger.info(f"using coverage config: {cfg.coverage}")
        logger.info(f"using random number generator seed: {cfg.seed}")

        if not cfg.terminate_early:
            logger.info(
                "search will continue after an acceptable patch has been discovered"
            )
        else:
            logger.info(
                "search will terminate when an acceptable patch has been discovered"
            )

        # create the resource tracker
        resources = ResourceUsageTracker.with_limits(cfg.resource_limits)
        logger.info(str(cfg.resource_limits))

        # build program
        logger.debug("building program...")
        program = cfg.program.build(environment)
        logger.debug(f"built program: {program}")

        # compute coverage
        logger.info("computing coverage information...")
        coverage = cfg.coverage.build(environment, program)
        logger.info("computed coverage information")
        logger.debug(f"coverage: {coverage}")

        # compute localization
        logger.info("computing fault localization...")
        localization = \
            Localization.from_config(coverage, cfg.localization)
        logger.info(f"computed fault localization:\n{localization}")

        # determine implicated files
        files = localization.files

        if program.language in (Language.CPP, Language.C):
            kaskara_project = kaskara.Project(
                dockerblade=environment.dockerblade,
                image=program.image,
                directory=program.source_directory,
                files=files)
            analyser = kaskara.clang.ClangAnalyser()
            analysis = analyser.analyse(kaskara_project)
        elif program.language == Language.PYTHON:
            kaskara_project = kaskara.Project(
                dockerblade=environment.dockerblade,
                image=program.image,
                directory=program.source_directory,
                files=files)
            analyser = kaskara.python.PythonAnalyser()
            analysis = analyser.analyse(kaskara_project)
        else:
            analysis = None

        # build problem
        problem = Problem.build(environment=environment,
                                config=cfg,
                                language=program.language,
                                program=program,
                                coverage=coverage,
                                analysis=analysis)

        logger.info("constructing database of donor snippets...")
        snippets: SnippetDatabase
        if analysis is not None:
            snippets = StatementSnippetDatabase.from_kaskara(analysis, cfg)
        else:
            snippets = LineSnippetDatabase.for_problem(problem)
        logger.info(
            f"constructed database of donor snippets: {len(snippets)} snippets"
        )  # noqa

        transformations = cfg.transformations.build(problem, snippets,
                                                    localization)  # noqa
        searcher = cfg.search.build(problem,
                                    resources=resources,
                                    transformations=transformations,
                                    localization=localization,
                                    threads=cfg.threads)

        # build session
        return Session(dir_patches=dir_patches,
                       resources=resources,
                       problem=problem,
                       searcher=searcher,
                       terminate_early=cfg.terminate_early)
Beispiel #27
0
def run(args):
    hosts = args.node or DEFAULT_NODES

    if not args.verbose:
        LOG.remove()
        LOG.add(
            sys.stdout,
            format="<green>[{time:HH:mm:ss.SSS}]</green> {message}",
        )
        LOG.disable("infra")
        LOG.disable("ccf")

    LOG.info(
        f"Starting {len(hosts)} CCF node{'s' if len(hosts) > 1 else ''}...")
    if args.enclave_type == "virtual":
        LOG.warning("Virtual mode enabled")

    with infra.network.network(
            hosts=hosts,
            binary_directory=args.binary_dir,
            library_directory=args.library_dir,
            dbg_nodes=args.debug_nodes,
    ) as network:
        if args.recover:
            args.label = args.label + "_recover"
            LOG.info("Recovering network from:")
            LOG.info(f" - Common directory: {args.common_dir}")
            LOG.info(f" - Ledger: {args.ledger_dir}")
            if args.snapshot_dir:
                LOG.info(f" - Snapshots: {args.snapshot_dir}")
            else:
                LOG.warning(
                    "No available snapshot to recover from. Entire transaction history will be replayed."
                )
            network.start_in_recovery(args, args.ledger_dir, args.snapshot_dir,
                                      args.common_dir)
            network.recover(args)
        else:
            network.start_and_join(args)

        primary, backups = network.find_nodes()
        max_len = len(str(len(backups)))

        # To be sure, confirm that the app frontend is open on each node
        for node in [primary, *backups]:
            with node.client("user0") as c:
                if args.verbose:
                    r = c.get("/app/commit")
                else:
                    r = c.get("/app/commit", log_capture=[])
                assert r.status_code == http.HTTPStatus.OK, r.status_code

        def pad_node_id(nid):
            return (f"{{:{max_len}d}}").format(nid)

        LOG.info("Started CCF network with the following nodes:")
        LOG.info("  Node [{}] = https://{}:{}".format(
            pad_node_id(primary.node_id), primary.pubhost, primary.rpc_port))

        for b in backups:
            LOG.info("  Node [{}] = https://{}:{}".format(
                pad_node_id(b.node_id), b.pubhost, b.rpc_port))

        LOG.info(
            f"You can now issue business transactions to the {args.package} application."
        )
        LOG.info(
            f"Keys and certificates have been copied to the common folder: {network.common_dir}"
        )
        LOG.info(
            "See https://microsoft.github.io/CCF/master/users/issue_commands.html for more information."
        )
        LOG.warning("Press Ctrl+C to shutdown the network.")

        try:
            while True:
                time.sleep(60)

        except KeyboardInterrupt:
            LOG.info("Stopping all CCF nodes...")

    LOG.info("All CCF nodes stopped.")
Beispiel #28
0
    def from_config(environment: Environment, cfg: Config) -> 'Session':
        """Creates a new repair session according to a given configuration."""
        client_bugzoo = environment.bugzoo

        # create the patch directory
        dir_patches = cfg.dir_patches
        if os.path.exists(dir_patches):
            logger.warning("clearing existing patch directory")
            for fn in glob.glob(f'{dir_patches}/*.diff'):
                if os.path.isfile(fn):
                    os.remove(fn)

        # seed the RNG
        # FIXME use separate RNG for each session
        random.seed(cfg.seed)

        logger.info(f"using {cfg.threads} threads")
        logger.info(f"using language: {cfg.program.language.value}")
        logger.info(f"using optimizations: {cfg.optimizations}")
        logger.info(f"using coverage config: {cfg.coverage}")
        logger.info(f"using random number generator seed: {cfg.seed}")

        if not cfg.terminate_early:
            logger.info(
                "search will continue after an acceptable patch has been discovered"
            )
        else:
            logger.info(
                "search will terminate when an acceptable patch has been discovered"
            )

        if cfg.limit_time_minutes is None:
            logger.info("no time limit is being enforced")
        if cfg.limit_time_minutes is not None:
            logger.info("using time limit: {cfg.limit_time_minutes} minutes")

        if cfg.limit_candidates is not None:
            logger.info(
                f"using candidate limit: {cfg.limit_candidates} candidates"
            )  # noqa
        else:
            logger.info("no limit on number of candidate evaluations")

        # check if search is unbounded
        if not cfg.limit_time and not cfg.limit_candidates:
            m = "no resource limits were specified; resource use will be unbounded"  # noqa
            logger.warn(m)

        # build program
        logger.debug("building program...")
        program = cfg.program.build(environment)
        logger.debug(f"built program: {program}")

        # compute coverage
        logger.info("computing coverage information...")
        coverage = cfg.coverage.build(environment, program)
        logger.info("computed coverage information")
        logger.debug(f"coverage: {coverage}")

        # compute localization
        logger.info("computing fault localization...")
        localization = \
            Localization.from_config(coverage, cfg.localization)
        logger.info(f"computed fault localization:\n{localization}")

        # determine implicated files and lines
        files = localization.files
        lines: List[FileLine] = list(localization)

        if program.language in (Language.CPP, Language.C):
            kaskara_project = kaskara.Project(
                dockerblade=environment.dockerblade,
                image=program.image,
                directory=program.source_directory,
                files=files)
            analyser = kaskara.clang.ClangAnalyser()
            analysis = analyser.analyse(kaskara_project)
        elif program.language == Language.PYTHON:
            kaskara_project = kaskara.Project(
                dockerblade=environment.dockerblade,
                image=program.image,
                directory=program.source_directory,
                files=files)
            analyser = kaskara.python.PythonAnalyser()
            analysis = analyser.analyse(kaskara_project)
        else:
            analysis = None

        # build problem
        problem = Problem.build(environment=environment,
                                config=cfg,
                                language=program.language,
                                program=program,
                                coverage=coverage,
                                analysis=analysis)

        logger.info("constructing database of donor snippets...")
        snippets: SnippetDatabase
        if analysis is not None:
            snippets = StatementSnippetDatabase.from_kaskara(analysis, cfg)
        else:
            snippets = LineSnippetDatabase.for_problem(problem)
        logger.info(
            f"constructed database of donor snippets: {len(snippets)} snippets"
        )  # noqa

        # FIXME build and index transformations
        # FIXME does not allow lazy construction!
        schemas: List[TransformationSchema] = []
        for schema_config in cfg.transformations.schemas:
            schemas.append(schema_config.build(problem, snippets))
        logger.info("constructing transformation database...")
        tx = list(
            build_transformations(problem,
                                  snippets,
                                  localization,
                                  schemas,
                                  eager=True))
        logger.info(
            f"constructed transformation database: {len(tx)} transformations"
        )  # noqa

        searcher = cfg.search.build(problem,
                                    transformations=tx,
                                    threads=cfg.threads,
                                    candidate_limit=cfg.limit_candidates,
                                    time_limit=cfg.limit_time)

        # build session
        return Session(dir_patches=dir_patches,
                       problem=problem,
                       searcher=searcher,
                       terminate_early=cfg.terminate_early)
Beispiel #29
0
 def first(self, stage_name: str) -> SingleClassifierResult:
     for each in self.data:
         if each.stage == stage_name:
             return each
     logger.warning(f"no stage named {stage_name} found")
def compute_scores(
    native_path: str,
    decoys_dir: str,
    sequence_length: int,
    voronota="voronota-cadscore",
):
    residue_index = pd.RangeIndex(sequence_length, name="residue_idx")
    decoys = []
    local_scores = []
    global_scores = []

    with tempfile.TemporaryDirectory() as tmpdir:
        decoy_paths = list(Path(decoys_dir).glob("*.pdb"))
        logger.info(f"Running CAD score on {len(decoy_paths)} decoys")
        start = time.time()
        for decoy_path in decoy_paths:
            logger.debug(decoy_path)
            decoy_name = decoy_path.with_suffix("").name
            cad_scores_path = Path(tmpdir) / decoy_name
            try:
                result = subprocess.run(
                    [
                        voronota,
                        "--input-target",
                        Path(native_path).expanduser().resolve().as_posix(),
                        "--input-model",
                        decoy_path.expanduser().resolve().as_posix(),
                        "--output-residue-scores",
                        cad_scores_path.expanduser().resolve().as_posix(),
                        "--cache-dir",
                        tmpdir,
                        "--contacts-query-by-code",
                        "AS",
                    ],
                    capture_output=True,
                    check=True,
                    timeout=TIMEOUT_SEC,
                )
            except subprocess.TimeoutExpired as e:
                try:
                    msg = e.stderr.decode()
                except:
                    msg = "<no stderr>"
                logger.warning(f"Timed out {decoy_path}: {msg}")
                continue
            except subprocess.CalledProcessError as e:
                try:
                    msg = e.stderr.decode()
                except:
                    msg = "<no stderr>"
                logger.warning(f"Exit code {e.returncode} {decoy_path}: {msg}")
                continue

            try:
                # Parse local scores from output file
                df = pd.read_csv(cad_scores_path,
                                 delimiter=" ",
                                 names=["residue_str", "local_cad"])
                df.index = (df["residue_str"].str.extract(
                    "r<(\d+)>", expand=False).astype(int).values - 1)
                local_cad = (df["local_cad"].reindex(residue_index,
                                                     fill_value=np.nan).values)
                # Parse global score from stdout
                global_score = float(result.stdout.decode().split()[4])
            except FileNotFoundError as e:
                logger.warning(
                    f"CAD score did not produce local residue output")
            except Exception as e:
                logger.warning(f"Error while parsing output: {e}")
            else:
                decoys.append(decoy_name)
                local_scores.append(local_cad)
                global_scores.append(global_score)

    logger.info(
        f"Done {len(decoys)} out of {len(decoy_paths)} in {time.time() - start:.1f} seconds"
    )

    if len(decoys) == 0 or len(local_scores) == 0 or len(global_scores) == 0:
        raise CadScoreError(
            f"No decoy was successfully evaluated for {native_path}")

    return {
        "decoys": np.array(decoys),
        "local_cad": np.stack(local_scores, axis=0),
        "global_cad": np.array(global_scores),
    }
    args = parser.parse_args()

    if args.u is None:
        args.u = input("RIT Username: "******"RIT Password: "******"Enter download directory: ")
        if not args.d:
            args.d = os.path.join(os.getcwd(),"MyCoursesDownloaderOutput")

    workingDirectory = os.path.join(os.getcwd(), args.d)

    if not os.path.exists(workingDirectory):
        logger.warning("Directory does not exist. Creating")
        mkdir_recursive(workingDirectory)

    URLS = []  # [("22222", "PLOS.140"), ("11111", "NSSA.220")]

    #
    # Start the Session
    #
    with halo.Halo(text="Logging in to MyCourses", spinner="dots") as progress:

        session = requests.Session()
        # Log in. Now with Shibboleth support!
        r = session.get(
            D2L_BASEURL + '/Shibboleth.sso/Login?entityID=https://shibboleth.main.ad.rit.edu/idp/shibboleth&target=https%3A%2F%2Fmycourses.rit.edu%2Fd2l%2FshibbolethSSO%2Flogin.d2l',
            allow_redirects=True)
Beispiel #32
0
#导入
from loguru import logger

#logger.add('a.txt',format='{time} {level} {message}',level='WARNING')
logger.add('a.txt',
           format='{time:YYYY-MM-DD at HH:mm:ss} {level} {message}',
           level='WARNING')

logger.info("这是一条INFO级别的日志")
logger.debug("这是一条DEBUG级别的日志")
logger.warning("这是一条WARNING级别的日志")
logger.success("这是一条SUCCESS级别的日志")
logger.error("这是一条ERROR级别的日志")

#日志格式化
logger.info("这是一条{}的日志", 'error')