#!/usr/bin/env python # coding=utf-8 from multiprocessing import Process import sys import datetime import ujson as json import time import random import libpyfeature_extract import tensorflow as tf fe = libpyfeature_extract.PyFeatureExtract('feature_index') def process(in_path): out_path = in_path.replace('train_data', 'nt_ads_train_data_cvr') print(in_path, out_path) filename = out_path options_zlib = tf.python_io.TFRecordOptions( tf.python_io.TFRecordCompressionType.GZIP) writer = tf.python_io.TFRecordWriter(filename, options=options_zlib) cnt = 0 with open(in_path) as f: for line in f: sample = json.loads(line) if sample['label']['click'] == 0: continue cnt += 1 s = fe.extract_tf_example(line) writer.write(s) if cnt % 1000 == 0:
#!/usr/bin/env python # coding=utf-8 from multiprocessing import Process import sys import datetime import ujson as json import time import random import libpyfeature_extract import tensorflow as tf fe = libpyfeature_extract.PyFeatureExtract('') def process(in_path): out_path = in_path.replace('train_data', 'ads_train_data') print(in_path, out_path) filename = out_path options_zlib = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.GZIP) writer = tf.python_io.TFRecordWriter(filename, options=options_zlib) cnt = 0 with open(in_path) as f: for line in f: cnt += 1 s = fe.extract_tf_example(line) writer.write(s) if cnt % 1000 == 0: print('process %s' % cnt) writer.close()
def callPartitionSetup(self): sys.path.append('lib') import libpyfeature_extract self.fe_lib = libpyfeature_extract.PyFeatureExtract('')