Python text2ids Exemples

Langage de programmation: Python

Espace de nommage/Pack: deepiu.image_caption.text2ids

Méthode/Fonction: text2ids

Exemples au hotexamples.com: 2

Python text2ids - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de deepiu.image_caption.text2ids.text2ids extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Exemple #1

0

Afficher le fichier

def deal_file(file, thread_index): out_file = '{}/{}_{}'.format( FLAGS.output_directory, FLAGS.name, thread_index) if FLAGS.threads > 1 else '{}/{}'.format( FLAGS.output_directory, FLAGS.name) print('out_file:', out_file) with melt.tfrecords.Writer(out_file) as writer: num = 0 for line in open(file): if num % 1000 == 0: print(num) l = line.rstrip().split('\t') img = l[0] img_end = IMAGE_FEATURE_LEN + 1 img_feature = [float(x) for x in l[1:img_end]] texts = [x.split('\x01')[0] for x in l[img_end:]] for text in texts: if text.strip() == '': continue #@TODO from text -> ids should move out so online code can share it for evaluation or use for feed dict #words = segmentor.Segment(text, FLAGS.seg_method) #word_ids = [vocabulary.id(word) for word in words if vocabulary.has(word) or ENCODE_UNK] word_ids = text2ids.text2ids(text, seg_method=FLAGS.seg_method, feed_single=FLAGS.feed_single, allow_all_zero=True, pad=False) word_ids_length = len(word_ids) if num % 1000 == 0: print(text, word_ids, text2ids.ids2text(word_ids), file=sys.stderr) if len(word_ids) == 0: continue word_ids = word_ids[:TEXT_MAX_WORDS] if FLAGS.pad: word_ids = gezi.pad(word_ids, TEXT_MAX_WORDS, 0) if FLAGS.np_save: gtexts[thread_index].append(word_ids) gtext_strs[thread_index].append(text) #add pos info? weght info? or @TODO add click num info example = tf.train.Example(features=tf.train.Features( feature={ 'image_name': melt.bytes_feature(img), 'image_feature': melt.float_feature(img_feature), 'text': melt.int_feature(word_ids), 'text_str': melt.bytes_feature(text), })) writer.write(example) global counter, max_num_words, sum_words with counter.get_lock(): counter.value += 1 if word_ids_length > max_num_words.value: with max_num_words.get_lock(): max_num_words.value = word_ids_length with sum_words.get_lock(): sum_words.value += word_ids_length num += 1 texts_dict[thread_index] = gtexts[thread_index] text_strs_dict[thread_index] = gtext_strs[thread_index]

Exemple #2

0

Afficher le fichier

Fichier : gen-records-streaming.py Projet : Hibbert-pku/hasky

writer = melt.tfrecords.Writer(outfile) num = 0 count = 0 for line in sys.stdin: if num % 1000 == 0: print(num, file=sys.stderr) num += 1 l = line.rstrip().split('\t') img = l[0] img_end = IMAGE_FEATURE_LEN + 1 img_feature = [float(x) for x in l[1: img_end]] texts = [x.split('\x01')[0] for x in l[img_end:]] for text in texts: word_ids = text2ids.text2ids(text, seg_method=FLAGS.seg_method, feed_single=FLAGS.feed_single, allow_all_zero=True, pad=False) word_ids_length = len(word_ids) if num % 1000 == 0: #print(libgezi.gbk2utf8('\t'.join(words)), file=sys.stderr) #print('\t'.join(words), file=sys.stderr) print(word_ids, file=sys.stderr) if len(word_ids) == 0: continue word_ids = word_ids[:TEXT_MAX_WORDS] if FLAGS.pad: word_ids = gezi.pad(word_ids, TEXT_MAX_WORDS, 0) if writer is not None: example = tf.train.Example(features=tf.train.Features(feature={ 'image_name': melt.bytes_feature(img), 'image_feature': melt.float_feature(img_feature),