def set_extension(self, strength, units, quantity, dosageform):
        extension = "quantity: %s, unit: %s" % (quantity, units)
        if strength and strength != "-":
            extension = "quantity: %s, unit: %s, strength: %s" % (
                quantity, units, strength)
            if dosageform[:6].lower() in [
                    "tablet", "capsul"
            ] or dosageform.lower() in [
                    "cap", "tab", "เม็ด", "ยาเม็ด", "แคปซูล"
            ]:
                if strength.isdecimal():
                    extension = "%s %s (quantity: %s, unit: %s, strength: %s %s)" % (
                        utils.format_number(
                            decimal.Decimal(strength) * quantity), units,
                        quantity, units, strength, units)
                else:
                    drug_quantity_unit = strength.split()
                    if len(drug_quantity_unit) == 2:
                        (inner_qty, inner_unit) = drug_quantity_unit
                        if inner_qty.isdecimal():
                            extension = "%s %s (quantity: %s, unit: %s, strength: %s)" % (
                                utils.format_number(
                                    decimal.Decimal(inner_qty) * quantity),
                                inner_unit, quantity, units, strength)

        if units is None:
            extension = extension.replace('unit: None, ', '')

        self.resource['extension'][0]['valueString'] = extension
        return extension
def update(val):
    # read values from the sliders
    Qw = slide_Qw.val
    H = hydro.get_backwater_dBdx(eta, S, B, H0, Cf, Qw, nx, dx)
    Xs = hydro.find_backwaterregion(H, dx)

    # update the artists in the window
    water_line.set_ydata(eta + H)
    water_shade.set_xy(utils.format_polyvects(x / 1000, x / 1000, eta,
                                              eta + H))
    Qw_val.set_text("Qw = " + utils.format_number(Qw))
    Bw_val.set_text("backwater from \n" + "RK " + str(int(L*mou/1000-Xs[0]/1000)) + \
        " to " + str(int(L*mou/1000-Xs[1]/1000)))
    Bw_val.set_x(((Xs[1] - Xs[0]) / 2 + Xs[0]) / 1000)
    Bw_brack.set_xdata(np.array([Xs[0], Xs[0], Xs[1], Xs[1]]) / 1000)
    for tab_row in np.arange(1, np.size(tabData, 0) + 1):
        vect_idx = tab_row - 1
        H_val = H[RKidxs[vect_idx]]
        overTable._cells[(tab_row,
                          0)]._text.set_text(utils.format_table_number(H_val))
        stage_val = H[RKidxs[vect_idx]] + eta[RKidxs[vect_idx]]
        overTable._cells[(tab_row, 1)]._text.set_text(
            utils.format_table_number(stage_val))
        over_val = H[RKidxs[vect_idx]] + eta[RKidxs[vect_idx]] > eta[
            RKidxs[vect_idx]] + zed[RKidxs[vect_idx]]
        overTable._cells[(tab_row, 2)]._text.set_text(str(over_val))
        overTable._cells[(tab_row, 2)]._text.set_color(
            utils.format_table_color(over_val))

    # redraw the canvas
    fig.canvas.draw_idle()
Example #3
0
def create_var_stats(data,
                     var_list,
                     type,
                     test_func,
                     out_prefix,
                     force_unique=True,
                     plot_dunn=True):
    out_csv = config.OUT_EVALS_DIR + "/" + out_prefix
    out_png = config.OUT_PLOT_DIR + "/" + out_prefix

    res = {}
    stat = ['s', 'p']
    res_posthoc = None
    plot_list = []
    statistics_dct = {}
    statistics_idx_col = "var"
    statistics_dct[statistics_idx_col] = []
    statistics_dct['n'] = []
    column_names = [statistics_idx_col, 'n'] + stat

    for var in var_list:
        res[var] = {}
        dct = data.to_single_type_dict(type, var, force_unique)
        lst = data.to_single_type_list(type, var, force_unique)

        # statistic test
        res[var][stat[0]], res[var][stat[1]] = test_func(
            *lst)  # '*' splits list into list of arguments
        if 'n' not in res[var]:
            res[var]['n'] = len(lst[0])

        statistics_dct[statistics_idx_col].append(var)
        statistics_dct['n'].append(len(lst[0]))
        for st in stat:
            if st not in statistics_dct:
                statistics_dct[st] = []
            val = utils.format_number(res[var][st], config.PRINT_PRECISION)
            statistics_dct[st].append(val)

        # dunn
        if plot_dunn:
            frame = pd.DataFrame.from_dict(dct)
            frame = frame.melt(var_name='groups', value_name='values')
            res_posthoc = sp.posthoc_dunn(frame,
                                          val_col='values',
                                          group_col='groups',
                                          p_adjust='bonferroni')
            path = out_png + "heat_" + var + "_" + sp.posthoc_dunn.__name__ + "." + config.OUT_PNG_EXT
            plot_list.append(plots.saveHeatMapPlot(res_posthoc, path))

    # statistics to df
    statistics_df = pd.DataFrame(statistics_dct, columns=column_names)
    statistics_df.set_index(statistics_idx_col)
    out_var_path = out_csv + "question_groups_" + type.name + "_" + test_func.__name__ + "." + config.OUT_CSV_EXT
    statistics_df.to_csv(out_var_path, index=False)

    return res, plot_list
Example #4
0
def textprogress(display, current, total):
    """
        Download progress in terminal
    """
    percentage = current/float(total) * 100
    
    sys.stdout.write("\r%-56.56s %3i%% [%5sB / %5sB]" % \
        (display,
         percentage,
         format_number(current),
         format_number(total)))
         
    if percentage == 100:
        sys.stdout.write("\n")
        
    # This makes sure the cursor ends up on the far right
    # Without this the cursor constantly jumps around
    sys.stdout.flush()
    def set_row_data(self):
        _row_data = []
        _list_items = []

        for idx, country in enumerate(self._countries_data):
            _row_data.append((idx + 1, country['country_name'],
                              utils.format_number(country['cases'])))
            _list_items.append(country['country_name'])

        return _row_data[::-1], _list_items
    def on_select_country(self, id):

        _select_button = self.root.ids['home_screen'].ids['select_btn']

        _cases_inc = self.root.ids['home_screen'].ids['cases_inc']
        _cases_tot = self.root.ids['home_screen'].ids['cases_tot']
        _recovered_inc = self.root.ids['home_screen'].ids['recovered_inc']
        _recovered_tot = self.root.ids['home_screen'].ids['recovered_tot']
        _deaths_inc = self.root.ids['home_screen'].ids['deaths_inc']
        _deaths_tot = self.root.ids['home_screen'].ids['deaths_tot']

        _cases_tot.text = "+" + utils.format_number(
            self._countries_data[id]['cases']) + ' Total'
        _cases_inc.text = "+" + utils.format_number(
            self._countries_data[id]['today_cases'])
        _recovered_tot.text = "+" + utils.format_number(
            self._countries_data[id]['recovered']) + ' Total'
        _recovered_inc.text = "+" + utils.format_number(
            self._countries_data[id]['today_recovered'])
        _deaths_tot.text = '+' + utils.format_number(
            self._countries_data[id]['deaths']) + ' Total'
        _deaths_inc.text = '+' + utils.format_number(
            self._countries_data[id]['today_deaths'])

        _select_button.text = self._countries_data[id]['country_name']

        self.country_dialog.dismiss()
Example #7
0
    def on_get_changes_size(self):
    
        # Build list of packages to be downloaded
        packages = [(value["Package"], value["Version"]) \
                    for key, value in self.status.items() \
                    if value["Status"] in ["to be downloaded", "dependency to be downloaded"]]

        count = 0
        total = 0        
        for name, version in packages:
            package = self.get_binary_version(name, version)
            if package:
                total += int(package["Size"])
                count += 1
        
        return (count, format_number(total), total)
Example #8
0
 def fit_step(self, dsf, batch_size, verbose=0):
     # Read the batch data to fit
     batch, index_batch, times_batch = dsf.get_batch(self.x_col, self.y_col, size=batch_size)
     x, y = batch
     with tf.GradientTape() as t:
         # Computes the model posterior
         particles_global, log_p, variables_sampler = self.sampler_global.sample(batch_size)
         states, theta, llkl, p0, log_jacobian, variables_model = self.model.model_evaluation(
             y, particles=particles_global, particles_t=x)
         p1 = llkl + p0 - log_p
         elbo = tf.reduce_mean(p1)
         _elbo = format_number(elbo.numpy())
         if verbose > 0:
             print(f'elbo: {_elbo}')
         variables = variables_sampler + variables_model
         g = t.gradient(-elbo, variables)
         # g = [tf.clip_by_value(x, -1e4, 1e4) for x in g]
         # optimize
     self.inferer.optimize(g, variables)
     return _elbo
Example #9
0
 def fit_step(self, dsf, batch_size, verbose=0):
     # Read the batch data to fit
     batch, index_batch, times_batch = dsf.get_batch(
         [self.x_col], self.y_col, self.states0_col, size=batch_size)
     x_t, y_t, states0 = batch
     with tf.GradientTape() as t:
         # Computes the model posterior
         particles_global, log_p_global, variables_global = self.sampler_global.sample()
         particles_t, log_p, variables_shaper = self.shaper_local(*x_t, particles_global=particles_global)
         states, theta, llkl, p0, log_jacobian, variables_model = self.model.model_evaluation(
             y_t, particles_t, particles_global, **states0)
         p1 = llkl - log_p - log_p_global
         elbo = tf.reduce_mean(p1)
         _elbo = format_number(elbo.numpy())
         if verbose > 0:
             print(f'elbo: {_elbo}, llkl: {np.mean(llkl.numpy())}, log_p: {np.mean(log_p.numpy())}')
         variables = variables_shaper + variables_model + variables_global
         g = t.gradient(-elbo, variables)
     # optimize
     self.inferer.optimize(g, variables)
     return _elbo
    def on_start(self):

        self.root.ids['home_screen'].ids[
            'cases_tot'].text = "+" + utils.format_number(
                self._countries_data[0]['cases']) + ' Total'
        self.root.ids['home_screen'].ids[
            'cases_inc'].text = "+" + utils.format_number(
                self._countries_data[0]['today_cases'])
        self.root.ids['home_screen'].ids[
            'recovered_tot'].text = "+" + utils.format_number(
                self._countries_data[0]['recovered']) + ' Total'
        self.root.ids['home_screen'].ids[
            'recovered_inc'].text = "+" + utils.format_number(
                self._countries_data[0]['today_recovered'])
        self.root.ids['home_screen'].ids[
            'deaths_tot'].text = '+' + utils.format_number(
                self._countries_data[0]['deaths']) + ' Total'
        self.root.ids['home_screen'].ids[
            'deaths_inc'].text = '+' + utils.format_number(
                self._countries_data[0]['today_deaths'])

        self.create_graph()
Example #11
0
    def forward(self,  # type: ignore
                input_ids: torch.LongTensor,
                input_mask: torch.LongTensor,
                input_segments: torch.LongTensor,
                passage_mask: torch.LongTensor,
                question_mask: torch.LongTensor,
                number_indices: torch.LongTensor,
                passage_number_order: torch.LongTensor,
                question_number_order: torch.LongTensor,
                question_number_indices: torch.LongTensor,
                gnodes: torch.LongTensor,
                gnodes_len: torch.LongTensor,
                gnodes_mask: torch.LongTensor,
                gedges: torch.LongTensor,
                gedge_types: int,
                answer_as_passage_spans: torch.LongTensor = None,
                answer_as_question_spans: torch.LongTensor = None,
                answer_as_add_sub_expressions: torch.LongTensor = None,
                answer_as_counts: torch.LongTensor = None,
                answer_as_text_to_disjoint_bios: torch.LongTensor = None,
                answer_as_list_of_bios: torch.LongTensor = None,
                span_bio_labels: torch.LongTensor = None,
                bio_wordpiece_mask: torch.LongTensor = None,
                is_bio_mask: torch.LongTensor = None,
                metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:

        outputs = self.bert(input_ids, attention_mask=input_mask, token_type_ids=input_segments)
        sequence_output = outputs[0]
        sequence_output_list = [ item for item in outputs[2][-4:] ]

        batch_size = input_ids.size(0)
        if ("passage_span_extraction" in self.answering_abilities or "question_span" in self.answering_abilities) and self.use_gcn:
            # M2, M3
            sequence_alg = self._gcn_input_proj(torch.cat([sequence_output_list[2], sequence_output_list[3]], dim=2))
            encoded_passage_for_numbers = sequence_alg
            encoded_question_for_numbers = sequence_alg
            encoded_dates = sequence_alg
            encoded_passage = sequence_alg

            #gen embedding
            real_gnodes_indices = gnodes_mask - 1
            real_gnodes_mask = gnodes_mask>0
            real_gnodes_indices_col_len = real_gnodes_indices.size(-1)
            z0 = real_gnodes_indices.view(batch_size, -1)
            #gnodes will large than 0, since passage_start exist
            z1=(z0>-9999).nonzero()[:,0].view(batch_size,-1)
            gnodes_embs = encoded_passage[z1,z0]
            gnodes_embs = gnodes_embs.view(batch_size,-1,real_gnodes_indices_col_len,encoded_passage.size(-1))
            gnodes_token_ids = real_gnodes_indices

            real_gnodes_mask = real_gnodes_mask.unsqueeze(-1).expand(-1,-1, -1, encoded_passage.size(-1))
            gnodes_embs = util.replace_masked_values(gnodes_embs, real_gnodes_mask, 0)
            sum_gnodes_embs = gnodes_embs.sum(-2)
            mean_gnodes_embs = sum_gnodes_embs/gnodes_len.unsqueeze(-1).float()
            gnodes_mask_bin = (gnodes_len > 0).long()
            cls_emb = sequence_output[:, 0]
            new_gnodes_embs = self.qdgat(mean_gnodes_embs, gnodes_mask_bin, cls_emb, gedges, gedge_types, is_print=False)


            gnodes_embs_updated = util.replace_masked_values(new_gnodes_embs.unsqueeze(-2).expand(-1,-1,real_gnodes_indices_col_len,-1), real_gnodes_mask, 0)
            gnodes_embs_updated = gnodes_embs_updated.view(batch_size, -1, sequence_alg.size(-1))

            gcn_info_vec = torch.zeros(encoded_passage.shape, dtype=torch.float, device=gnodes.device)

            gnodes_updated_index = util.replace_masked_values(z0, z0>0, 0)
            gcn_info_vec.scatter_(1, gnodes_updated_index.unsqueeze(-1).expand(-1, -1, gnodes_embs_updated.size(-1)), gnodes_embs_updated)

            sequence_output_list[2] = self._gcn_enc(self._proj_ln(sequence_output_list[2] + gcn_info_vec))
            sequence_output_list[0] = self._gcn_enc(self._proj_ln0(sequence_output_list[0] + gcn_info_vec))
            sequence_output_list[1] = self._gcn_enc(self._proj_ln1(sequence_output_list[1] + gcn_info_vec))
            sequence_output_list[3] = self._gcn_enc(self._proj_ln3(sequence_output_list[3] + gcn_info_vec))


        # passage hidden and question hidden
        sequence_h2_weight = self._proj_sequence_h(sequence_output_list[2]).squeeze(-1)
        passage_h2_weight = util.masked_softmax(sequence_h2_weight, passage_mask)
        passage_h2 = util.weighted_sum(sequence_output_list[2], passage_h2_weight)
        question_h2_weight = util.masked_softmax(sequence_h2_weight, question_mask)
        question_h2 = util.weighted_sum(sequence_output_list[2], question_h2_weight)

        # passage g0, g1, g2
        question_g0_weight = self._proj_sequence_g0(sequence_output_list[0]).squeeze(-1)
        question_g0_weight = util.masked_softmax(question_g0_weight, question_mask)
        question_g0 = util.weighted_sum(sequence_output_list[0], question_g0_weight)

        question_g1_weight = self._proj_sequence_g1(sequence_output_list[1]).squeeze(-1)
        question_g1_weight = util.masked_softmax(question_g1_weight, question_mask)
        question_g1 = util.weighted_sum(sequence_output_list[1], question_g1_weight)

        question_g2_weight = self._proj_sequence_g2(sequence_output_list[2]).squeeze(-1)
        question_g2_weight = util.masked_softmax(question_g2_weight, question_mask)
        question_g2 = util.weighted_sum(sequence_output_list[2], question_g2_weight)


        if len(self.answering_abilities) > 1:
            # Shape: (batch_size, number_of_abilities)
            answer_ability_logits = self._answer_ability_predictor(torch.cat([passage_h2, question_h2, sequence_output[:, 0]], 1))
            answer_ability_log_probs = F.log_softmax(answer_ability_logits, -1)
            best_answer_ability = torch.argmax(answer_ability_log_probs, 1)
            top_two_answer_abilities = torch.topk(answer_ability_log_probs, k=2, dim=1)

        real_number_indices = number_indices.squeeze(-1) - 1
        number_mask = (real_number_indices > -1).long()
        clamped_number_indices = util.replace_masked_values(real_number_indices, number_mask, 0)
        encoded_passage_for_numbers = torch.cat([sequence_output_list[2], sequence_output_list[3]], dim=-1)

        encoded_numbers = torch.gather(encoded_passage_for_numbers, 1,
            clamped_number_indices.unsqueeze(-1).expand(-1, -1, encoded_passage_for_numbers.size(-1)))
        number_weight = self._proj_number(encoded_numbers).squeeze(-1)
        number_mask = (number_indices > -1).long()
        number_weight = util.masked_softmax(number_weight, number_mask)
        number_vector = util.weighted_sum(encoded_numbers, number_weight)



        if "counting" in self.answering_abilities:
            # Shape: (batch_size, 10)
            count_number_logits = self._count_number_predictor(torch.cat([number_vector, passage_h2, question_h2, sequence_output[:, 0]], dim=1))
            count_number_log_probs = torch.nn.functional.log_softmax(count_number_logits, -1)
            # Info about the best count number prediction
            # Shape: (batch_size,)
            best_count_number = torch.argmax(count_number_log_probs, -1)
            best_count_log_prob = torch.gather(count_number_log_probs, 1, best_count_number.unsqueeze(-1)).squeeze(-1)
            if len(self.answering_abilities) > 1:
                best_count_log_prob += answer_ability_log_probs[:, self._counting_index]

        if "passage_span_extraction" in self.answering_abilities or "question_span_extraction" in self.answering_abilities:
            # start 0, 2
            sequence_for_span_start = torch.cat([sequence_output_list[2],
                                                 sequence_output_list[0],
                                                 sequence_output_list[2]*question_g2.unsqueeze(1),
                                                 sequence_output_list[0]*question_g0.unsqueeze(1)],
                                     dim=2)
            sequence_span_start_logits = self._span_start_predictor(sequence_for_span_start).squeeze(-1)
            # Shape: (batch_size, passage_length, modeling_dim * 2)
            sequence_for_span_end = torch.cat([sequence_output_list[2],
                                               sequence_output_list[1],
                                               sequence_output_list[2]*question_g2.unsqueeze(1),
                                               sequence_output_list[1]*question_g1.unsqueeze(1)],
                                            dim=2)
            # Shape: (batch_size, passage_length)
            sequence_span_end_logits = self._span_end_predictor(sequence_for_span_end).squeeze(-1)
            # Shape: (batch_size, passage_length)

            if "passage_span_extraction" in self.answering_abilities:
                passage_span_start_log_probs = util.masked_log_softmax(sequence_span_start_logits, passage_mask)
                passage_span_end_log_probs = util.masked_log_softmax(sequence_span_end_logits, passage_mask)

                # Info about the best passage span prediction
                passage_span_start_logits = util.replace_masked_values(sequence_span_start_logits, passage_mask, -1e7)
                passage_span_end_logits = util.replace_masked_values(sequence_span_end_logits, passage_mask, -1e7)
                # Shage: (batch_size, topk, 2)
                best_passage_span = util.get_best_span(passage_span_start_logits, passage_span_end_logits)

            if "question_span_extraction" in self.answering_abilities:
                question_span_start_log_probs = util.masked_log_softmax(sequence_span_start_logits, question_mask)
                question_span_end_log_probs = util.masked_log_softmax(sequence_span_end_logits, question_mask)

                # Info about the best question span prediction
                question_span_start_logits = util.replace_masked_values(sequence_span_start_logits, question_mask, -1e7)
                question_span_end_logits = util.replace_masked_values(sequence_span_end_logits, question_mask, -1e7)
                # Shape: (batch_size, topk, 2)
                best_question_span = util.get_best_span(question_span_start_logits, question_span_end_logits)


        if "addition_subtraction" in self.answering_abilities:
            alg_encoded_numbers = torch.cat(
                [encoded_numbers,
                 question_h2.unsqueeze(1).repeat(1, encoded_numbers.size(1), 1),
                 passage_h2.unsqueeze(1).repeat(1, encoded_numbers.size(1), 1),
                 sequence_output[:, 0].unsqueeze(1).repeat(1, encoded_numbers.size(1), 1)
                 ], 2)

            # Shape: (batch_size, # of numbers in the passage, 3)
            number_sign_logits = self._number_sign_predictor(alg_encoded_numbers)
            number_sign_log_probs = torch.nn.functional.log_softmax(number_sign_logits, -1)

            # Shape: (batch_size, # of numbers in passage).
            best_signs_for_numbers = torch.argmax(number_sign_log_probs, -1)
            # For padding numbers, the best sign masked as 0 (not included).
            best_signs_for_numbers = util.replace_masked_values(best_signs_for_numbers, number_mask, 0)
            # Shape: (batch_size, # of numbers in passage)
            best_signs_log_probs = torch.gather(number_sign_log_probs, 2, best_signs_for_numbers.unsqueeze(-1)).squeeze(
                -1)
            # the probs of the masked positions should be 1 so that it will not affect the joint probability
            # TODO: this is not quite right, since if there are many numbers in the passage,
            # TODO: the joint probability would be very small.
            best_signs_log_probs = util.replace_masked_values(best_signs_log_probs, number_mask, 0)
            # Shape: (batch_size,)
            best_combination_log_prob = best_signs_log_probs.sum(-1)
            if len(self.answering_abilities) > 1:
                best_combination_log_prob += answer_ability_log_probs[:, self._addition_subtraction_index]

        # add multiple span prediction
        if bio_wordpiece_mask is None or not self.multispan_use_bio_wordpiece_mask:
            multispan_mask = input_mask
        else:
            multispan_mask = input_mask * bio_wordpiece_mask
        if "multiple_spans" in self.answering_abilities:
            if self.multispan_head_name == "flexible_loss":
                multispan_log_probs, multispan_logits = self._multispan_module(sequence_output, seq_mask=multispan_mask)
            else:
                multispan_log_probs, multispan_logits = self._multispan_module(sequence_output)

        output_dict = {}

        # If answer is given, compute the loss.
        if answer_as_passage_spans is not None or answer_as_question_spans is not None or answer_as_add_sub_expressions is not None or answer_as_counts is not None:

            log_marginal_likelihood_list = []

            for answering_ability in self.answering_abilities:
                if answering_ability == "passage_span_extraction":
                    # Shape: (batch_size, # of answer spans)
                    gold_passage_span_starts = answer_as_passage_spans[:, :, 0]
                    gold_passage_span_ends = answer_as_passage_spans[:, :, 1]
                    # Some spans are padded with index -1,
                    # so we clamp those paddings to 0 and then mask after `torch.gather()`.
                    gold_passage_span_mask = (gold_passage_span_starts != -1).long()
                    clamped_gold_passage_span_starts = util.replace_masked_values(gold_passage_span_starts,
                                                                                  gold_passage_span_mask, 0)
                    clamped_gold_passage_span_ends = util.replace_masked_values(gold_passage_span_ends,
                                                                                gold_passage_span_mask, 0)
                    # Shape: (batch_size, # of answer spans)
                    log_likelihood_for_passage_span_starts = torch.gather(passage_span_start_log_probs, 1,
                                                                          clamped_gold_passage_span_starts)
                    log_likelihood_for_passage_span_ends = torch.gather(passage_span_end_log_probs, 1,
                                                                        clamped_gold_passage_span_ends)
                    # Shape: (batch_size, # of answer spans)
                    log_likelihood_for_passage_spans = log_likelihood_for_passage_span_starts + log_likelihood_for_passage_span_ends
                    # For those padded spans, we set their log probabilities to be very small negative value
                    log_likelihood_for_passage_spans = util.replace_masked_values(log_likelihood_for_passage_spans,
                                                                                  gold_passage_span_mask, -1e7)
                    # Shape: (batch_size, )
                    log_marginal_likelihood_for_passage_span = util.logsumexp(log_likelihood_for_passage_spans)

#                    print('passage_span_extraction: ', log_marginal_likelihood_for_passage_span)
                    log_marginal_likelihood_list.append(log_marginal_likelihood_for_passage_span)

                elif answering_ability == "question_span_extraction":
                    # Shape: (batch_size, # of answer spans)
                    gold_question_span_starts = answer_as_question_spans[:, :, 0]
                    gold_question_span_ends = answer_as_question_spans[:, :, 1]
                    # Some spans are padded with index -1,
                    # so we clamp those paddings to 0 and then mask after `torch.gather()`.
                    gold_question_span_mask = (gold_question_span_starts != -1).long()
                    clamped_gold_question_span_starts = util.replace_masked_values(gold_question_span_starts,
                                                                                   gold_question_span_mask, 0)
                    clamped_gold_question_span_ends = util.replace_masked_values(gold_question_span_ends,
                                                                                 gold_question_span_mask, 0)
                    # Shape: (batch_size, # of answer spans)
                    log_likelihood_for_question_span_starts = torch.gather(question_span_start_log_probs, 1,
                                                                           clamped_gold_question_span_starts)
                    log_likelihood_for_question_span_ends = torch.gather(question_span_end_log_probs, 1,
                                                                         clamped_gold_question_span_ends)
                    # Shape: (batch_size, # of answer spans)
                    log_likelihood_for_question_spans = log_likelihood_for_question_span_starts + log_likelihood_for_question_span_ends
                    # For those padded spans, we set their log probabilities to be very small negative value
                    log_likelihood_for_question_spans = util.replace_masked_values(log_likelihood_for_question_spans,
                                                                                   gold_question_span_mask, -1e7)
                    # Shape: (batch_size, )
                    # pylint: disable=invalid-name
                    log_marginal_likelihood_for_question_span = util.logsumexp(log_likelihood_for_question_spans)

                    # question multi span prediction
                    log_marginal_likelihood_list.append(log_marginal_likelihood_for_question_span)
#                    print('log_marginal_likelihood_for_question_span: ', log_marginal_likelihood_for_question_span)

                elif answering_ability == "addition_subtraction":
                    # The padded add-sub combinations use 0 as the signs for all numbers, and we mask them here.
                    # Shape: (batch_size, # of combinations)
                    gold_add_sub_mask = (answer_as_add_sub_expressions.sum(-1) > 0).float()
                    # Shape: (batch_size, # of numbers in the passage, # of combinations)
                    gold_add_sub_signs = answer_as_add_sub_expressions.transpose(1, 2)
                    # Shape: (batch_size, # of numbers in the passage, # of combinations)
                    log_likelihood_for_number_signs = torch.gather(number_sign_log_probs, 2, gold_add_sub_signs)
                    # the log likelihood of the masked positions should be 0
                    # so that it will not affect the joint probability
                    log_likelihood_for_number_signs = util.replace_masked_values(log_likelihood_for_number_signs,
                                                                                 number_mask.unsqueeze(-1), 0)
                    # Shape: (batch_size, # of combinations)
                    log_likelihood_for_add_subs = log_likelihood_for_number_signs.sum(1)
                    # For those padded combinations, we set their log probabilities to be very small negative value
                    log_likelihood_for_add_subs = util.replace_masked_values(log_likelihood_for_add_subs,
                                                                             gold_add_sub_mask, -1e7)
                    # Shape: (batch_size, )
                    log_marginal_likelihood_for_add_sub = util.logsumexp(log_likelihood_for_add_subs)
                    log_marginal_likelihood_list.append(log_marginal_likelihood_for_add_sub)
#                    print('log_marginal_likelihood_for_add_sub: ', log_marginal_likelihood_for_add_sub)

                elif answering_ability == "counting":
                    # Count answers are padded with label -1,
                    # so we clamp those paddings to 0 and then mask after `torch.gather()`.
                    # Shape: (batch_size, # of count answers)
                    gold_count_mask = (answer_as_counts != -1).long()
                    # Shape: (batch_size, # of count answers)
                    clamped_gold_counts = util.replace_masked_values(answer_as_counts, gold_count_mask, 0)
                    log_likelihood_for_counts = torch.gather(count_number_log_probs, 1, clamped_gold_counts)
                    # For those padded spans, we set their log probabilities to be very small negative value
                    log_likelihood_for_counts = util.replace_masked_values(log_likelihood_for_counts, gold_count_mask,
                                                                           -1e7)
                    # Shape: (batch_size, )
                    log_marginal_likelihood_for_count = util.logsumexp(log_likelihood_for_counts)
                    log_marginal_likelihood_list.append(log_marginal_likelihood_for_count)
#                    print('log_marginal_likelihood_for_count: ', log_marginal_likelihood_for_count)
                elif answering_ability == "multiple_spans":
                    if self.multispan_head_name == "flexible_loss":
                        log_marginal_likelihood_for_multispan = \
                            self._multispan_log_likelihood(answer_as_text_to_disjoint_bios,
                                                           answer_as_list_of_bios,
                                                           span_bio_labels,
                                                           multispan_log_probs,
                                                           multispan_logits,
                                                           multispan_mask,
                                                           bio_wordpiece_mask,
                                                           is_bio_mask)
                    else:
                        log_marginal_likelihood_for_multispan = \
                            self._multispan_log_likelihood(span_bio_labels,
                                                           multispan_log_probs,
                                                           multispan_mask,
                                                           is_bio_mask,
                                                           logits=multispan_logits)
                    log_marginal_likelihood_list.append(log_marginal_likelihood_for_multispan)

                else:
                    raise ValueError(f"Unsupported answering ability: {answering_ability}")
            if len(self.answering_abilities) > 1:
                # Add the ability probabilities if there are more than one abilities
                all_log_marginal_likelihoods = torch.stack(log_marginal_likelihood_list, dim=-1)
                all_log_marginal_likelihoods = all_log_marginal_likelihoods + answer_ability_log_probs
                marginal_log_likelihood = util.logsumexp(all_log_marginal_likelihoods)
            else:
                marginal_log_likelihood = log_marginal_likelihood_list[0]
            output_dict["loss"] = - marginal_log_likelihood.mean()
#            print('batch_loss: ', output_dict["loss"])

        with torch.no_grad():
            best_answer_ability = best_answer_ability.detach().cpu().numpy()
            if metadata is not None:
                output_dict["question_id"] = []
                output_dict["answer"] = []
                i = 0
                while i < batch_size:
                    if len(self.answering_abilities) > 1:
                        answer_index = best_answer_ability[i]
                        predicted_ability_str = self.answering_abilities[answer_index]
                    else:
                        predicted_ability_str = self.answering_abilities[0]

                    answer_json: Dict[str, Any] = {}

                    question_start = 1
                    passage_start = len(metadata[i]["question_tokens"]) + 2
                    # We did not consider multi-mention answers here
                    if predicted_ability_str == "passage_span_extraction":
                        answer_json["answer_type"] = "passage_span"
                        passage_str = metadata[i]['original_passage']
                        offsets = metadata[i]['passage_token_offsets']
                        predicted_span = tuple(best_passage_span[i].detach().cpu().numpy())
                        start_offset = offsets[predicted_span[0] - passage_start][0]
                        end_offset = offsets[predicted_span[1] - passage_start][1]

                        end_offset = end_offset
                        predicted_answer = passage_str[start_offset:end_offset]
                        answer_json["value"] = predicted_answer
                        answer_json["spans"] = [(start_offset, end_offset)]

                    elif predicted_ability_str == "question_span_extraction":
                        answer_json["answer_type"] = "question_span"
                        question_str = metadata[i]['original_question']
                        offsets = metadata[i]['question_token_offsets']
                        predicted_span = tuple(best_question_span[i].detach().cpu().numpy())
                        start_offset = offsets[predicted_span[0] - question_start][0]
                        end_offset = offsets[predicted_span[1] - question_start][1]

                        end_offset = end_offset
                        predicted_answer = question_str[start_offset:end_offset]
                        answer_json["value"] = predicted_answer
                        answer_json["spans"] = [(start_offset, end_offset)]

                    elif predicted_ability_str == "addition_subtraction":  # plus_minus combination answer
                        answer_json["answer_type"] = "arithmetic"
                        original_numbers = metadata[i]['original_numbers']
                        sign_remap = {0: 0, 1: 1, 2: -1}
                        predicted_signs = [sign_remap[it] for it in best_signs_for_numbers[i].detach().cpu().numpy()]
                        result = sum([sign * number for sign, number in zip(predicted_signs, original_numbers)])
                        predicted_answer = format_number(result)

                        offsets = metadata[i]['passage_token_offsets']
                        number_indices = metadata[i]['number_indices']
                        number_positions = [offsets[index - 1] for index in number_indices]
                        answer_json['numbers'] = []
                        for offset, value, sign in zip(number_positions, original_numbers, predicted_signs):
                            answer_json['numbers'].append({'span': offset, 'value': value, 'sign': sign})
                        if number_indices[-1] == -1:
                            # There is a dummy 0 number at position -1 added in some cases; we are
                            # removing that here.
                            answer_json["numbers"].pop()
                        answer_json["value"] = result
                        answer_json['number_sign_log_probs'] = number_sign_log_probs[i, :, :].detach().cpu().numpy()
                    elif predicted_ability_str == "counting":
                        answer_json["answer_type"] = "count"
                        predicted_count = best_count_number[i].detach().cpu().numpy()
                        predicted_answer = str(predicted_count)
                        answer_json["count"] = predicted_count
                    elif predicted_ability_str == "multiple_spans":
                        passage_str = metadata[i]["original_passage"]
                        question_str = metadata[i]['original_question']
                        qp_tokens = metadata[i]["question_passage_tokens"]
                        answer_json["answer_type"] = "multiple_spans"
                        if self.multispan_head_name == "flexible_loss":
                            answer_json["value"], answer_json["spans"], invalid_spans = \
                                self._multispan_prediction(multispan_log_probs[i], multispan_logits[i], qp_tokens,
                                                           passage_str,
                                                           question_str,
                                                           multispan_mask[i], bio_wordpiece_mask[i],
                                                           self.multispan_use_prediction_beam_search and not self.training)
                        else:
                            answer_json["value"], answer_json["spans"], invalid_spans = \
                                self._multispan_prediction(multispan_log_probs[i], multispan_logits[i], qp_tokens,
                                                           passage_str,
                                                           question_str,
                                                           multispan_mask[i])
                        if self._unique_on_multispan:
                            answer_json["value"] = list(OrderedDict.fromkeys(answer_json["value"]))

                            if self._dont_add_substrings_to_ms:
                                answer_json["value"] = remove_substring_from_prediction(answer_json["value"])

                        if len(answer_json["value"]) == 0:
                            best_answer_ability[i] = top_two_answer_abilities[1][i][1]
                            continue
                        predicted_answer = answer_json["value"]
                    else:
                        raise ValueError(f"Unsupported answer ability: {predicted_ability_str}")

                    answer_json["predicted_answer"] = predicted_answer
                    output_dict["question_id"].append(metadata[i]["question_id"])
                    output_dict["answer"].append(answer_json)
                    answer_annotations = metadata[i].get('answer_annotations', [])
                    if answer_annotations:
                        self._drop_metrics(predicted_answer, answer_annotations)


                    ##################################################
                    real_answer_types=[]
                    answer_json['real']={}
                    if answer_as_passage_spans[i][0][0].detach().cpu().numpy() >= 0:#found passage span
                        real_answer_types.append('passage span')

                    if answer_as_question_spans[i][0][0].detach().cpu().numpy() >= 0:#found question span
                        real_answer_types.append('question span')

                    if answer_as_add_sub_expressions[i].sum().detach().cpu().numpy() > 0:#found expr
                        real_answer_types.append('expr')
                        expr_arr = answer_as_add_sub_expressions[i].detach().cpu().numpy()
                        sign_remap = {0: 0, 1: '', 2: '-'}
                        exprs = []
                        for j in range(min(5,len(expr_arr))):
                          parts=[]
                          for k in range(len(metadata[i]['original_numbers'])):
                            if expr_arr[j][k] > 0:
                              parts.append(str(sign_remap[expr_arr[j][k]])+str(metadata[i]['original_numbers'][k]))
                          exprs.append('+'.join(parts).replace('+-','-',1101))
                        answer_json['real']['exprs']=';'.join(exprs)
                    if answer_as_counts[i].detach().cpu().numpy()>0:
                        real_answer_types.append('count')
                    answer_json['real']['answer_types']=','.join(real_answer_types)
                    ##################################################

                    i += 1
            return output_dict
def train(model: nn.Module, optimizer: optim.Optimizer, dataloader: DataLoader, epochs: int,
          loss_criterion: str, model_dir: str, plateau_limit: int, apply_nested_dropout: bool,
          reconstruct: bool, **kwargs):
    print(f'The model has {utils.get_num_parameters(model):,} parameters')
    testloader = kwargs.pop('testloader', None)
    lr_scheduler = kwargs.pop('lr_scheduler', None)

    loss_function = getattr(nn, loss_criterion)()
    batch_print = len(dataloader) // 5

    model.train()
    device = utils.get_device()
    model.to(device)  # TODO check if this actually does anything

    losses = []
    accuracies = []
    best_loss = float('inf')
    best_accuracy = 0
    plateau = 0
    train_time = 0
    for epoch in range(epochs):
        epoch_start = time.time()
        line = f'\tEpoch {epoch + 1}/{epochs}'
        if apply_nested_dropout and epoch > 0:
            line += f' ({model.get_converged_unit()}/{model.get_dropout_dim()} converged units)'
        print(line)

        batch_losses = []
        for i, (X, y) in enumerate(dataloader):
            optimizer.zero_grad()
            X = X.to(device)
            y = y.to(device)
            prediction = model(X)

            if reconstruct:
                loss = loss_function(prediction, X)
            else:
                loss = loss_function(prediction, y)

            loss.backward()
            optimizer.step()

            batch_losses.append(loss.item())
            if (i + 1) % batch_print == 0:
                batch_loss = utils.format_number(np.average(batch_losses[-batch_print:]))
                print(f'Batch {i + 1} loss: {batch_loss}')

            if apply_nested_dropout:
                model(X)
                if model.has_converged():
                    break

        epoch_loss = utils.format_number(np.average(batch_losses))
        losses.append(epoch_loss)

        epoch_time = time.time() - epoch_start
        train_time += epoch_time

        print(f'\tEpoch loss {epoch_loss}')

        model_save_kwargs = dict(**kwargs, epoch=epoch, train_time=utils.format_time(train_time), losses=losses)
        has_improved = False
        if testloader is not None:
            model.eval()
            eval_accuracy = round(utils.get_model_accuracy(model, testloader, device), 3)
            model.train()
            accuracies.append(eval_accuracy)
            print(f'\tEvaluation accuracy {eval_accuracy}')

            if eval_accuracy > best_accuracy:
                best_accuracy = eval_accuracy
                has_improved = True
                model_save_kwargs.update(accuracies=accuracies, best_accuracy=best_accuracy)

            if lr_scheduler is not None:
                lr_scheduler.step(eval_accuracy)

        elif epoch_loss < best_loss:
            best_loss = epoch_loss
            has_improved = True
            model_save_kwargs.update(best_loss=best_loss)

        print(f'\tEpoch time {utils.format_time(epoch_time)}\n')
        if has_improved:
            utils.save_model(model, optimizer, f'{model_dir}/model', **model_save_kwargs)
            plateau = 0
        else:
            plateau += 1

        if (plateau == plateau_limit) or (apply_nested_dropout is True and model.has_converged()):
            break

    if apply_nested_dropout is True and model.has_converged():
        end = 'nested dropout has converged'
        print('Nested dropout has converged!')
    elif plateau == plateau_limit:
        end = 'has plateaued'
        print('The model has plateaued...')
    else:
        end = f'reached max number of epochs ({epochs})'
        print('The maximum number of epochs has been reached...')
    utils.update_save(f'{model_dir}/model', end=end)

    return losses
                           nitt_water.seldata,
                           lw=1.5)
nitt_water_legend = ax.legend(
    [l for l in nitt_water_line],
    [str(list(d.keys())[0]) for d in nitt_water_dict])
for l in nitt_water_line:
    l.set_visible(False)
nitt_water_legend.set_visible(False)
nitt_bed_line, = plt.plot(L / 1000 * mou - nitt_bed.data[:, 0],
                          nitt_bed.data[:, 1],
                          '.',
                          color='grey',
                          visible=False)
Qw_val = plt.text(0.05,
                  0.85,
                  "Qw = " + utils.format_number(Qw),
                  fontsize=16,
                  transform=ax.transAxes,
                  backgroundcolor='white')
Bw_val = plt.text(
    ((Xs[1] - Xs[0]) / 2 + Xs[0]) / 1000,
    45,
    "backwater from \n" + "RK " + str(int(L * mou / 1000 - Xs[0] / 1000)) +
    " to " + str(int(L * mou / 1000 - Xs[1] / 1000)),
    horizontalalignment="center",
    backgroundcolor="white")
Bw_brack, = plt.plot(np.array([Xs[0], Xs[0], Xs[1], Xs[1]]) / 1000,
                     np.array([36, 40, 40, 36]),
                     'k-',
                     lw=1.2)
    def transform(self, dom):
        # Currency
        for currencyNode in dom.xpath("//currency"):
            latexMode = utils.etree_in_context(currencyNode, "latex")
            symbolNode = currencyNode.find("symbol")
            if symbolNode is None:
                symbol = "R"
                symbolLocation = "front"
            else:
                symbol = symbolNode.text.strip()
                symbolLocation = symbolNode.attrib.get("location", "front")
            numberNode = currencyNode.find("number")
            if numberNode.text is None:
                numberNode.text = ""
            # Set default precision to 0 if number is an int, and to 2 if it is a float
            try:
                int(numberNode.text.strip())
                defaultPrecision = 0
            except ValueError:
                defaultPrecision = 2
            currencyPrecision = int(currencyNode.attrib.get("precision", defaultPrecision))
            numberNode.text = ("%%.%if" % currencyPrecision) % float(numberNode.text.strip())

            replacementNode = etree.Element("dummy")
            if symbolLocation == "front":
                if latexMode:
                    replacementNode.text = r"\text{" + symbol + " }"
                else:
                    replacementNode.text = symbol + u"\u00a0"
                replacementNode.append(numberNode)
            else:
                replacementNode.append(numberNode)
                if latexMode:
                    replacementNode.tail = r"\text{ " + symbol + "}"
                else:
                    replacementNode.tail = u"\u00a0" + symbol
            utils.etree_replace_with_node_list(currencyNode.getparent(), currencyNode, replacementNode)

        # Percentage
        for percentageNode in dom.xpath("//percentage"):
            latexMode = utils.etree_in_context(percentageNode, "latex")
            percentageNode.tag = "number"
            if percentageNode.tail is None:
                percentageNode.tail = ""
            if latexMode:
                percentageNode.tail = r"\%" + percentageNode.tail
            else:
                percentageNode.tail = "%" + percentageNode.tail

        # United numbers: ensure that units follow numbers
        for node in dom.xpath("//unit_number"):
            if (len(node) == 2) and (node[0].tag == "unit") and (node[1].tag == "number"):
                unitNode = node[0]
                numberNode = node[1]
                del node[0]
                del node[0]
                node.append(numberNode)
                node.append(unitNode)

        # Numbers
        for numberNode in dom.xpath("//number"):
            # Avoid shortcode exercise numbers
            if (numberNode.getparent().tag == "entry") and (numberNode.getparent().getparent().tag == "shortcodes"):
                continue
            latexMode = utils.etree_in_context(numberNode, "latex")
            if (len(numberNode) == 0) and ("e" in numberNode.text):
                # Number in exponential notation: convert to <coeff> and <exp>
                numberText = numberNode.text
                float(numberText)  # Check that it is really a float
                numberNode.text = None
                numberNode.append(etree.Element("coeff"))
                pos = numberText.find("e")
                numberNode[-1].text = numberText[:pos]
                numberNode.append(etree.Element("exp"))
                numberNode[-1].text = str(int(numberText[pos + 1 :]))

            if len(numberNode) == 0:
                # No children, means it's just a plain number
                coeffText = utils.format_number(numberNode.text.strip())
                try:
                    if latexMode:
                        dummyNode = etree.fromstring(r"<dummy>\text{" + coeffText + "}</dummy>")
                    else:
                        dummyNode = etree.fromstring("<dummy>" + coeffText + "</dummy>")
                except etree.XMLSyntaxError, msg:
                    print repr(coeffText)
                    raise etree.XMLSyntaxError, msg
            else:
                # Scientific or exponential notation: parse out coefficient, base and exponent
                coeffNode = numberNode.find("coeff")
                expNode = numberNode.find("exp")
                baseNode = numberNode.find("base")
                if coeffNode is None:
                    # Exponential
                    if baseNode is None:
                        baseText = utils.format_number("10")
                    else:
                        baseText = utils.format_number(baseNode.text.strip())
                    assert expNode is not None, etree.tostring(numberNode)
                    expText = utils.format_number(expNode.text.strip())
                    if latexMode:
                        dummyNode = etree.fromstring(
                            r"<dummy>\text{" + baseText + r"}^{\text{" + expText + r"}}</dummy>"
                        )
                    else:
                        dummyNode = etree.fromstring("<dummy>" + baseText + "<sup>" + expText + "</sup></dummy>")
                else:
                    # Scientific notation or plain number (<coeff> only)
                    coeffText = utils.format_number(coeffNode.text.strip())
                    if expNode is None:
                        assert baseNode is None
                        try:
                            if latexMode:
                                dummyNode = etree.fromstring(r"<dummy>\text{" + coeffText + "}</dummy>")
                            else:
                                dummyNode = etree.fromstring("<dummy>" + coeffText + "</dummy>")
                        except etree.XMLSyntaxError, msg:
                            print repr(coeffText)
                            raise etree.XMLSyntaxError, msg
                    else:
                        if baseNode is None:
Example #15
0
    def calc_single_stat(self, type, var, func, stat=['p']):
        dct = {}
        idx_col = type.name if type != None else "VAR"
        dct[idx_col] = []
        n_col = 'n'
        column_names = [idx_col, n_col] + stat  # merge with stat list
        res = {'p': None, 's': None}

        if type == config.CalcByType.VIDEO:
            for v in config.Video:
                values = self.df[self.df.video == v.value][var].values
                dct[n_col] = len(values)
                res['s'], res['p'] = func(values)
                dct[idx_col].append(v.value)
                for st in stat:
                    if st not in dct:
                        dct[st] = []
                    dct[st].append(
                        utils.format_number(res[st], config.PRINT_PRECISION))

        if type == config.CalcByType.TOOL:
            for t in config.Tool:
                values = self.df[self.df.tool == t.value][var].values
                dct[n_col] = len(values)
                res['s'], res['p'] = func(values)
                dct[idx_col].append(t.value)
                for st in stat:
                    if st not in dct:
                        dct[st] = []
                    dct[st].append(
                        utils.format_number(res[st], config.PRINT_PRECISION))

        if type == config.CalcByType.VIDEO_TOOL:
            for t in config.Tool:
                dct[idx_col].append(t.value)
                for v in config.Video:
                    # works: self.df[self.df.tool == t.value][self.df.video == v.value][var].values
                    # but results in user warning, proceed with 2 steps here
                    df_tool_data = self.df[self.df.tool == t.value]
                    values = df_tool_data[df_tool_data.video ==
                                          v.value][var].values
                    res['s'], res['p'] = func(values)
                    if v.value not in dct:
                        dct[v.value] = []
                    add_vals = n_col + ": " + str(len(values))
                    add_vals += ", " + stat[0] + ": " + str(
                        utils.format_number(res[stat[0]],
                                            config.PRINT_PRECISION))
                    if len(stat) > 1:
                        add_vals = add_vals + ", " + stat[1] + ": " + str(
                            utils.format_number(res[stat[1]],
                                                config.PRINT_PRECISION))
                    dct[v.value].append(add_vals)
            # copy video names (don't just assign -- changes original list on altering later)
            column_names = [e.value for e in config.Video]
            column_names.insert(0, type.name)

        # calc stat for var only
        if type == None:
            values = self.df[var]
            dct[n_col] = len(values)
            res['s'], res['p'] = func(values)
            dct[idx_col].append(var)
            for st in stat:
                if st not in dct:
                    dct[st] = []
                dct[st].append(
                    utils.format_number(res[st], config.PRINT_PRECISION))

        # results as df
        df = pd.DataFrame(dct, columns=column_names)
        df.set_index(idx_col)

        return df
Example #16
0
            if server.db_select(collection, [keys[0]], [values[0]]) == None:
                server.db_insert(collection, keys, values)
            else:
                for i, *_ in enumerate(colunm):
                    if i >= 1:
                        server.db_update(collection, {keys[0]: values[0]},
                                         {keys[i]: values[i]})

with open('../files/milho_usd.csv', 'r') as arquivo_csv:
    reader = csv.reader(arquivo_csv, delimiter=',')
    for idx, colunm in enumerate(reader):
        if idx > 1:

            date = colunm[0].replace(',', '')
            date = date.split(' ')
            date = utils.format_number(date[1], 2) + '.' + utils.format_number(
                str(utils.mounth_numeric.get(date[0])), 2) + '.' + date[2]

            collection = 'historical_data_daily'
            keys = [
                'date', 'corn_usd', 'open_corn_usd', 'max_corn_usd',
                'min_corn_usd', 'vol_corn_usd', 'var_corn_usd'
            ]
            values = [
                utils.string_to_date(date),
                utils.string_to_float(colunm[1]),
                utils.string_to_float(colunm[2]),
                utils.string_to_float(colunm[3]),
                utils.string_to_float(colunm[4]),
                utils.string_to_float(colunm[5]),
                utils.string_to_float(colunm[6])
    def transform(self, dom):
        # Currency
        for currencyNode in dom.xpath('//currency'):
            latexMode = utils.etree_in_context(currencyNode, 'latex')
            symbolNode = currencyNode.find('symbol')
            if symbolNode is None:
                symbol = 'R'
                symbolLocation = 'front'
            else:
                symbol = symbolNode.text.strip()
                symbolLocation = symbolNode.attrib.get('location', 'front')
            numberNode = currencyNode.find('number')
            if numberNode.text is None:
                numberNode.text = ''
            # Set default precision to 0 if number is an int, and to 2 if it is a float
            try:
                int(numberNode.text.strip())
                defaultPrecision = 0
            except ValueError:
                defaultPrecision = 2
            currencyPrecision = int(currencyNode.attrib.get('precision', defaultPrecision))
            numberNode.text = ("%%.%if"%currencyPrecision)%float(numberNode.text.strip())

            replacementNode = etree.Element('dummy')
            if symbolLocation == 'front':
                if latexMode:
                    replacementNode.text = r'\text{' + symbol + ' }'
                else:
                    replacementNode.text = symbol + u'\u00a0'
                replacementNode.append(numberNode)
            else:
                replacementNode.append(numberNode)
                if latexMode:
                    replacementNode.tail = r'\text{ ' + symbol + '}'
                else:
                    replacementNode.tail = u'\u00a0' + symbol
            utils.etree_replace_with_node_list(currencyNode.getparent(), currencyNode, replacementNode)

        # Percentage
        for percentageNode in dom.xpath('//percentage'):
            latexMode = utils.etree_in_context(percentageNode, 'latex')
            percentageNode.tag = 'number'
            if percentageNode.tail is None:
                percentageNode.tail = ''
            if latexMode:
                percentageNode.tail = r'\%' + percentageNode.tail
            else:
                percentageNode.tail = '%' + percentageNode.tail

        # United numbers: ensure that units follow numbers
        for node in dom.xpath('//unit_number'):
            if (len(node) == 2) and (node[0].tag == 'unit') and (node[1].tag == 'number'):
                unitNode = node[0]
                numberNode = node[1]
                del node[0]
                del node[0]
                node.append(numberNode)
                node.append(unitNode)

        # Numbers
        for numberNode in dom.xpath('//number'):
            # Avoid shortcode exercise numbers
            if (numberNode.getparent().tag == 'entry') and (numberNode.getparent().getparent().tag == 'shortcodes'):
                continue
            latexMode = utils.etree_in_context(numberNode, 'latex')
            if (len(numberNode) == 0) and ('e' in numberNode.text):
                # Number in exponential notation: convert to <coeff> and <exp>
                numberText = numberNode.text
                float(numberText) # Check that it is really a float
                numberNode.text = None
                numberNode.append(etree.Element('coeff'))
                pos = numberText.find('e')
                numberNode[-1].text = numberText[:pos]
                numberNode.append(etree.Element('exp'))
                numberNode[-1].text = str(int(numberText[pos+1:]))

            if len(numberNode) == 0:
                # No children, means it's just a plain number
                coeffText = utils.format_number(numberNode.text.strip())
                try:
                    if latexMode:
                        dummyNode = etree.fromstring(r'<dummy>\text{' + coeffText + '}</dummy>')
                    else:
                        dummyNode = etree.fromstring('<dummy>' + coeffText + '</dummy>')
                except etree.XMLSyntaxError, msg:
                    print repr(coeffText)
                    raise etree.XMLSyntaxError, msg
            else:
                # Scientific or exponential notation: parse out coefficient, base and exponent
                coeffNode = numberNode.find('coeff')
                expNode = numberNode.find('exp')
                baseNode = numberNode.find('base')
                if coeffNode is None:
                    # Exponential
                    if baseNode is None:
                        baseText = utils.format_number('10')
                    else:
                        baseText = utils.format_number(baseNode.text.strip())
                    assert expNode is not None, etree.tostring(numberNode)
                    expText = utils.format_number(expNode.text.strip())
                    if latexMode:
                        dummyNode = etree.fromstring(r'<dummy>\text{' + baseText + r'}^{\text{' + expText + r'}}</dummy>')
                    else:
                        dummyNode = etree.fromstring('<dummy>' + baseText + '<sup>' + expText + '</sup></dummy>')
                else:
                    # Scientific notation or plain number (<coeff> only)
                    coeffText = utils.format_number(coeffNode.text.strip())
                    if expNode is None:
                        assert baseNode is None
                        try:
                            if latexMode:
                                dummyNode = etree.fromstring(r'<dummy>\text{' + coeffText + '}</dummy>')
                            else:
                                dummyNode = etree.fromstring('<dummy>' + coeffText + '</dummy>')
                        except etree.XMLSyntaxError, msg:
                            print repr(coeffText)
                            raise etree.XMLSyntaxError, msg
                    else:
                        if baseNode is None:
Example #18
0
import calendar
import utils

with open('../files/dados_safra_milho_conab.csv', 'r') as arquivo_csv:
    reader = csv.reader(arquivo_csv, delimiter=',')
    year = 2013
    month = 1

    collection = 'historical_data_daily'

    for colunm in reader:
        if year < 2021:
            for week in calendar.monthcalendar(year, month):
                for day in week:
                    if day != 0:
                        date = utils.format_number(
                            day, 2) + '.' + utils.format_number(
                                month, 2) + '.' + str(year)
                        date = utils.string_to_date(date)
                        if server.db_select(collection, ['date'],
                                            [date]) == None:
                            server.db_insert(
                                collection, ['date', 'br_production'],
                                [date, utils.string_to_float(colunm[1])])
                        else:
                            server.db_update(collection, {"date": date}, {
                                'br_production':
                                utils.string_to_float(colunm[1])
                            })

                        if server.db_select(collection, ['date'],
                                            [date]) == None:
Example #19
0
def train(model: ConvAutoencoder,
          optimizer: optim.Optimizer,
          dataloader: DataLoader,
          epochs: int,
          loss_criterion: str,
          model_dir: str,
          apply_nested_dropout: bool,
          plateau_limit: int,
          config: dict,
          lr_scheduler=None,
          logger: Logger = None):
    print(f'The model has {utils.get_num_parameters(model):,} parameters')
    loss_function = getattr(nn, loss_criterion)()
    batch_print = len(dataloader) // 5

    device = utils.get_device()
    model.to(device)
    model.train()

    losses = []
    best_loss = float('inf')
    plateau = 0
    train_time = 0
    for epoch in range(epochs):
        epoch_start = time.time()
        line = f'\tEpoch {epoch + 1}/{epochs}'
        if apply_nested_dropout and epoch > 0:
            line += f' ({model.get_converged_unit()}/{model.get_dropout_dim()} converged units)'
        print(line)

        batch_losses = []
        for i, (X, _) in enumerate(dataloader):
            optimizer.zero_grad()
            X = X.to(device)
            prediction = model(X)

            loss = loss_function(prediction, X)
            loss.backward()
            optimizer.step()

            batch_losses.append(loss.item())
            if (i + 1) % batch_print == 0:
                batch_loss = utils.format_number(
                    np.average(batch_losses[-batch_print:]))
                print(f'Batch {i + 1} loss: {batch_loss}')

            if apply_nested_dropout and not model.has_converged():
                model(X)
                # if model.has_converged():
                #     break

        epoch_loss = utils.format_number(np.average(batch_losses))
        losses.append(epoch_loss)

        epoch_time = time.time() - epoch_start
        train_time += epoch_time

        print(f'\tEpoch loss {epoch_loss}')
        print(f'\tEpoch time {utils.format_time(epoch_time)}\n')

        model_save_dict = config.copy()
        model_save_dict['performance'] = dict(
            epoch=epoch,
            train_time=utils.format_time(train_time),
            losses=losses)
        has_improved = False
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            has_improved = True
            model_save_dict['performance']['best_loss'] = best_loss

        if apply_nested_dropout:
            model_save_dict['performance'][
                'converged_unit'] = model.get_converged_unit()
            model_save_dict['performance'][
                'dropout_dim'] = model.get_dropout_dim()

        # if lr_scheduler is not None and (model.apply_nested_dropout and model.has_converged()):
        if lr_scheduler is not None:
            if type(lr_scheduler) == optim.lr_scheduler.ReduceLROnPlateau:
                lr_scheduler.step(epoch_loss)
            else:
                lr_scheduler.step()

        utils.save_model(model, optimizer, f'{model_dir}/model',
                         model_save_dict)
        if has_improved:
            utils.save_model(model, optimizer, f'{model_dir}/best_model',
                             model_save_dict)
            plateau = 0
        else:
            plateau += 1

        if logger is not None:
            iteration = epoch + 1
            logger.report_scalar('Model Loss',
                                 'Train Loss',
                                 epoch_loss,
                                 iteration=iteration)
            model_visualizations.plot_filters(model,
                                              output_shape=(8, 8),
                                              show=False)
            logger.report_matplotlib_figure('Model Filters',
                                            'Filters',
                                            figure=plt,
                                            iteration=iteration,
                                            report_image=True)
            plt.close()
            if lr_scheduler is not None:
                logger.report_scalar('Learning Rate',
                                     'Current',
                                     utils.get_learning_rate(optimizer),
                                     iteration=iteration)
                logger.report_scalar('Learning Rate',
                                     'Initial',
                                     optimizer.defaults['lr'],
                                     iteration=iteration)
            if apply_nested_dropout:
                logger.report_scalar('Nested Dropout',
                                     'Converged Unit',
                                     model.get_converged_unit(),
                                     iteration=iteration)
                logger.report_scalar('Nested Dropout',
                                     'Dropout Dimension',
                                     model.get_dropout_dim(),
                                     iteration=iteration)

        # if (plateau == plateau_limit) or (apply_nested_dropout is True and model.has_converged()):
        #     break

    if False:  # apply_nested_dropout is True and model.has_converged():
        end = 'Nested dropout has converged'
    elif plateau == plateau_limit:
        end = 'The model has plateaued'
    else:
        end = f'Reached max number of epochs ({epochs})'
    print(end)
    utils.update_save(f'{model_dir}/model', end=end)