Exemple #1
0
 def postcompute_input1(self,
                        input0_package,
                        input1,
                        mask0=None,
                        mask1=None,
                        rel1_t=None):
     # ff_score0, ff2_hid0, biaff_hid0 = input0_package
     ff_score0, biaff_hid0 = input0_package
     ret = 0
     if rel1_t is not None:
         input1 = input1 + rel1_t
     if self.use_ff:
         ret = ff_score0 + self.A1(input1)
     if self.use_ff2:
         zfatal("Not supported in this mode!")
     if self.use_biaffine:
         # [*, in1, out]
         expr0 = biaff_hid0.view(
             BK.get_shape(biaff_hid0)[:-1] + [self.in_size1, self.out_size])
         # [*, 1, in1] * [*, in1, out] -> [*, 1, out] -> [*, out]
         expr1 = BK.matmul(input1.unsqueeze(-2), expr0).squeeze(-2)
         ret += expr1 / self.biaffine_div
     if self.use_bias:
         ret += self.B
     # mask
     if mask0 is not None:
         ret += self.mask_value * (1. - mask0).unsqueeze(-1)
     if mask1 is not None:
         ret += self.mask_value * (1. - mask1).unsqueeze(-1)
     return self.drop_node(ret)
Exemple #2
0
 def valid_bigrams(scheme, tag_forms, bos):
     types = ChunksSeq.collect_types(tag_forms)
     #
     rets = {(bos, "O"), ("O", "O")}
     if scheme == "BIO":
         for t in types:
             rets.add((bos, "B-"+t))
             rets.add(("O", "B-"+t))
             rets.add(("B-"+t, "I-"+t))
             rets.add(("I-"+t, "I-"+t))
             rets.add(("B-"+t, "O"))
             rets.add(("I-"+t, "O"))
             for t2 in types:
                 rets.add(("B-"+t, "B-"+t2))
                 rets.add(("I-"+t, "B-"+t2))
     elif scheme == "BIOES":
         for t in types:
             rets.add((bos, "B-"+t))
             rets.add(("O", "B-"+t))
             rets.add((bos, "S-"+t))
             rets.add(("O", "S-"+t))
             rets.add(("B-"+t, "I-"+t))
             rets.add(("I-"+t, "I-"+t))
             rets.add(("B-"+t, "E-"+t))
             rets.add(("I-"+t, "E-"+t))
             rets.add(("S-"+t, "O"))
             rets.add(("E-"+t, "O"))
             for t2 in types:
                 rets.add(("S-"+t, "B-"+t2))
                 rets.add(("E-"+t, "B-"+t2))
                 rets.add(("S-"+t, "S-"+t2))
                 rets.add(("E-"+t, "S-"+t2))
     else:
         zfatal("Unknown tagging scheme")
     return rets
Exemple #3
0
 def __init__(self, model_type, args):
     # top-levels
     self.model_type = model_type
     self.conf_output = ""
     self.log_file = Logger.MAGIC_CODE
     self.msp_seed = 9341
     #
     self.niconf = NIConf()      # nn-init-conf
     self.dconf = DConf()        # data-conf
     # parser-conf
     if model_type == "simple":
         from ..models2.model import MySimpleIEModelConf
         self.mconf = MySimpleIEModelConf()
     elif model_type == "m3":
         from ..models3.model import M3IEModelConf
         self.mconf = M3IEModelConf()
     elif model_type == "m3a":
         from ..models3.modelA import M3AIEModelConf
         self.mconf = M3AIEModelConf()
     elif model_type == "m3r":
         from ..models3.modelR import M3RIEModelConf
         self.mconf = M3RIEModelConf()
     else:
         zfatal(f"Unknown model type: {model_type}, please provide correct type!")
     # =====
     #
     if args is not None:
         self.update_from_args(args)
         self.validate()
Exemple #4
0
def get_data_writer(file_or_fd, output_format):
    if output_format == "conllu":
        return ParserConlluWriter(file_or_fd)
    elif output_format == "plain":
        zwarn("May be meaningless to write plain files for parses!")
        return ParserPlainWriter(file_or_fd)
    elif output_format == "json":
        return ParserJsonWriter(file_or_fd)
    else:
        zfatal(
            "Unknown output_format %s, should select from {conllu,plain,json}"
            % output_format)
Exemple #5
0
 def precompute_input0(self, input0):
     ff_score0 = None
     ff2_hid0 = None
     biaff_hid0 = None
     if self.use_ff:
         ff_score0 = self.A0(input0)
     if self.use_ff2:
         # todo(+3)
         zfatal("Not supported in this mode!")
     if self.use_biaffine:
         biaff_hid0 = BK.matmul(input0, self.W)
     # return (ff_score0, ff2_hid0, biaff_hid0)
     return (ff_score0, biaff_hid0)
Exemple #6
0
 def _marginal(self, full_score_expr, maske_expr, lengths_arr):
     if self.alg_unproj:
         marginals_expr = nmarginal_unproj(full_score_expr,
                                           maske_expr,
                                           lengths_arr,
                                           labeled=True)
     elif self.alg_proj:
         marginals_expr = nmarginal_proj(full_score_expr,
                                         maske_expr,
                                         lengths_arr,
                                         labeled=True)
     else:
         zfatal(
             "Unsupported marginal-calculation for the decoding algorithm of "
             + self.conf.iconf.dec_algorithm)
         marginals_expr = None
     return marginals_expr
Exemple #7
0
def get_data_reader(file_or_fd,
                    input_format,
                    use_la0,
                    noef_link0,
                    aux_repr_file=None,
                    max_evt_layers=100):
    if input_format == "json":
        r = MyDocReader(file_or_fd,
                        use_la0,
                        noef_link0,
                        alter_carg_by_coref=True,
                        max_evt_layers=max_evt_layers)
    else:
        zfatal("Unknown input_format %s" % input_format)
        r = None
    if aux_repr_file is not None and len(aux_repr_file) > 0:
        r = AuxDataReader(r, aux_repr_file, "aux_repr")
    return r
Exemple #8
0
def build_model(model_type, conf, vpack):
    mconf = conf.mconf
    model = None
    if model_type == "simple":
        from ..models2.model import MySimpleIEModel
        model = MySimpleIEModel(mconf, vpack)
    elif model_type == "m3":
        from ..models3.model import M3IEModel
        model = M3IEModel(mconf, vpack)
    elif model_type == "m3a":
        from ..models3.modelA import M3AIEModel
        model = M3AIEModel(mconf, vpack)
    elif model_type == "m3r":
        from ..models3.modelR import M3RIEModel
        model = M3RIEModel(mconf, vpack)
    else:
        zfatal(f"Unknown model type: {model_type}, please provide correct type!")
    return model
Exemple #9
0
 def __init__(self, partype, args):
     # top-levels
     self.partype = partype
     self.conf_output = ""
     self.log_file = Logger.MAGIC_CODE
     self.msp_seed = 9341
     #
     self.niconf = NIConf()  # nn-init-conf
     self.dconf = DConf()  # data-conf
     # parser-conf
     if partype == "graph":
         from ..graph.parser import GraphParserConf
         self.pconf = GraphParserConf()
     elif partype == "td":
         from ..transition.topdown.parser import TdParserConf
         self.pconf = TdParserConf()
     elif partype == "ef":
         from ..ef.parser import EfParserConf
         self.pconf = EfParserConf()
     elif partype == "g1":
         from ..ef.parser import G1ParserConf
         self.pconf = G1ParserConf()
     elif partype == "g2":
         from ..ef.parser import G2ParserConf
         self.pconf = G2ParserConf()
     elif partype == "s2":
         from ..ef.parser import S2ParserConf
         self.pconf = S2ParserConf()
     elif partype == "fp":
         from ..zfp.fp import FpParserConf
         self.pconf = FpParserConf()
     else:
         zfatal(
             f"Unknown parser type: {partype}, please provide correct type with the option."
         )
     # =====
     #
     self.update_from_args(args)
     self.validate()
Exemple #10
0
 def output_tags(self, scheme):
     ret = []
     if scheme == "BIO":
         begin_tag, end_tag, single_tag = "B", "I", "B"
     elif scheme == "BIOES":
         begin_tag, end_tag, single_tag = "B", "E", "S"
     else:
         zfatal("Unknown tagging scheme")
     #
     for one in self.all_chunks:
         start, end, type = one
         if type:
             if end-start==1:
                 ret.append(single_tag+"-"+type)
             else:
                 ret.append(begin_tag+"-"+type)
                 for _ in range(end-start-2):
                     ret.append("I-"+type)
                 ret.append(end_tag+"-"+type)
         else:
             ret.append("O")
     zcheck(len(ret)==self.length, "Err length.")
     return ret
Exemple #11
0
 def _decode(self, full_score_expr, maske_expr, lengths_arr):
     if self.alg_unproj:
         return nmst_unproj(full_score_expr,
                            maske_expr,
                            lengths_arr,
                            labeled=True,
                            ret_arr=True)
     elif self.alg_proj:
         return nmst_proj(full_score_expr,
                          maske_expr,
                          lengths_arr,
                          labeled=True,
                          ret_arr=True)
     elif self.alg_greedy:
         return nmst_greedy(full_score_expr,
                            maske_expr,
                            lengths_arr,
                            labeled=True,
                            ret_arr=True)
     else:
         zfatal("Unknown decoding algorithm " +
                self.conf.iconf.dec_algorithm)
         return None
Exemple #12
0
def build_model(partype, conf, vpack):
    pconf = conf.pconf
    parser = None
    if partype == "graph":
        # original first-order graph with various output constraints
        from ..graph.parser import GraphParser
        parser = GraphParser(pconf, vpack)
    elif partype == "td":
        # re-implementation of the top-down stack-pointer parser
        zwarn(
            "Warning: Current implementation of td-mode is deprecated and outdated."
        )
        from ..transition.topdown.parser import TdParser
        parser = TdParser(pconf, vpack)
    elif partype == "ef":
        # generalized easy-first parser
        from ..ef.parser import EfParser
        parser = EfParser(pconf, vpack)
    elif partype == "g1":
        # first-order graph parser
        from ..ef.parser import G1Parser
        parser = G1Parser(pconf, vpack)
    elif partype == "g2":
        # higher-order graph parser
        from ..ef.parser import G2Parser
        parser = G2Parser(pconf, vpack)
    elif partype == "s2":
        # two-stage parser
        from ..ef.parser import S2Parser
        parser = S2Parser(pconf, vpack)
    elif partype == "fp":
        # the finale parser
        from ..zfp.fp import FpParser
        parser = FpParser(pconf, vpack)
    else:
        zfatal("Unknown parser type: %s")
    return parser
Exemple #13
0
def get_data_reader(file_or_fd,
                    input_format,
                    aug_code,
                    use_la0,
                    aux_repr_file=None,
                    aux_score_file=None,
                    cut=None):
    cut = -1 if (cut is None or len(cut) == 0) else int(cut)
    if input_format == "conllu":
        r = ParseConlluReader(file_or_fd, aug_code, use_la0=use_la0, cut=cut)
    elif input_format == "plain":
        r = ParseTextReader(file_or_fd, aug_code, cut=cut)
    elif input_format == "json":
        r = ParseJsonReader(file_or_fd, aug_code, use_la0=use_la0, cut=cut)
    else:
        zfatal(
            "Unknown input_format %s, should select from {conllu,plain,json}" %
            input_format)
        r = None
    if aux_repr_file is not None and len(aux_repr_file) > 0:
        r = AuxDataReader(r, aux_repr_file, "aux_repr")
    if aux_score_file is not None and len(aux_score_file) > 0:
        r = AuxDataReader(r, aux_score_file, "aux_score")
    return r
Exemple #14
0
 def refresh(self, rop=None):
     zfatal("Should call special_refresh instead!")
Exemple #15
0
def get_data_writer(file_or_fd, output_format):
    if output_format == "json":
        return MyDocWriter(file_or_fd)
    else:
        zfatal("Unknown output_format %s" % output_format)
Exemple #16
0
 def _restart(self):
     if self.it_ is None or self.restartable:
         self.it_ = iter(self.src_)
     else:
         zfatal("Cannot restart a non-repeatable stream")
Exemple #17
0
 def _restart(self):
     if self.started:
         zfatal("Cannot restart this fd stream")
     self.started = True
Exemple #18
0
 def __init__(self, pc: BK.ParamCollection, econf: EncConf):
     super().__init__(pc, None, None)
     self.conf = econf
     #
     self.input_dim = econf._input_dim
     self.enc_hidden = econf.enc_hidden
     # add the sublayers
     self.layers = []
     # todo(0): allowing repeated names
     last_dim = self.input_dim
     for name in econf.enc_ordering:
         if name == "rnn":
             if econf.enc_rnn_layer > 0:
                 rnn_bidirect, rnn_sep_bidirection = econf.enc_rnn_bidirect, econf.enc_rnn_sep_bidirection
                 rnn_enc_size = self.enc_hidden // 2 if rnn_bidirect else self.enc_hidden
                 rnn_layer = self.add_sub_node(
                     "rnn",
                     RnnLayerBatchFirstWrapper(
                         pc,
                         RnnLayer(pc,
                                  last_dim,
                                  rnn_enc_size,
                                  econf.enc_rnn_layer,
                                  node_type=econf.enc_rnn_type,
                                  bidirection=rnn_bidirect,
                                  sep_bidirection=rnn_sep_bidirection)))
                 self.layers.append(rnn_layer)
         # todo(+2): different i/o sizes for cnn and att?
         elif name == "cnn":
             if econf.enc_cnn_layer > 0:
                 per_cnn_size = self.enc_hidden // len(
                     econf.enc_cnn_windows)
                 cnn_layer = self.add_sub_node(
                     "cnn",
                     Sequential(pc, [
                         CnnLayer(pc,
                                  last_dim,
                                  per_cnn_size,
                                  econf.enc_cnn_windows,
                                  act="elu")
                         for _ in range(econf.enc_cnn_layer)
                     ]))
                 self.layers.append(cnn_layer)
         elif name == "att":
             if econf.enc_att_layer > 0:
                 zcheck(last_dim == self.enc_hidden,
                        "I/O should have same dim for Att-Enc")
                 att_layer = self.add_sub_node(
                     "att",
                     TransformerEncoder(
                         pc,
                         econf.enc_att_layer,
                         last_dim,
                         econf.enc_att_ff,
                         econf.enc_att_add_wrapper,
                         econf.enc_att_conf,
                         final_act=econf.enc_att_final_act,
                         fixed_range_vals=econf.enc_att_fixed_ranges))
                 self.layers.append(att_layer)
         elif name == "att2":
             if econf.enc_att2_layer > 0:
                 zcheck(last_dim == self.enc_hidden,
                        "I/O should have same dim for Att-Enc")
                 att2_layer = self.add_sub_node(
                     "att2",
                     Transformer2Encoder(
                         pc,
                         econf.enc_att2_layer,
                         last_dim,
                         econf.enc_att2_conf,
                         short_range=econf.enc_att2_short_range,
                         long_ranges=econf.enc_att2_long_ranges))
                 self.layers.append(att2_layer)
         else:
             zfatal("Unknown encoder name: " + name)
         if len(self.layers) > 0:
             last_dim = self.layers[-1].get_output_dims()[-1]
     self.output_dim = last_dim
     #
     if econf.no_final_dropout:
         self.disable_final_dropout()