def raw_to_proto(self, raw): if self.vocab is None: ids = [int(raw)] else: ids = [self.vocab[raw]] fe = feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=ids)) return fe
def raw_to_proto(self, raw): """doc""" ids = [ s if isinstance(s, int) else self.vocab.get(s, self.unk_id) for s in self.tokenizer(raw) ] fe = feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=ids)) return fe
def build_example(slots): txt, seginfo = slots txt_fe_list = feature_pb2.FeatureList(feature=[ feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=t)) for t in txt ]) segsinfo_fe_list = feature_pb2.FeatureList(feature=[ feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=s)) for s in seginfo ]) assert len(txt_fe_list.feature) == len( segsinfo_fe_list.feature), 'txt[%d] and seginfo[%d] size not match' % ( len(txt_fe_list.feature), len(segsinfo_fe_list.feature)) features = { 'txt': txt_fe_list, 'segs': segsinfo_fe_list, } ex = example_pb2.SequenceExample(feature_lists=feature_pb2.FeatureLists( feature_list=features)) return ex
def raw_to_proto(self, raw): """doc""" return feature_pb2.Feature()
def raw_to_proto(self, raw): """doc""" ids = [int(s) for s in raw.split(b' ')] fe = feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=ids)) return fe
def raw_to_proto(self, raw): """doc""" fe = feature_pb2.Feature(bytes_list=BytesList(value=[raw])) return fe
def raw_to_proto(self, raw): return feature_pb2.Feature()
def raw_to_proto(self, raw): ids = [self.vocab.get(s, self.unk_id) for s in self.tokenizer(raw)] fe = feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=ids)) return fe