예제 #1
0
# Misc Parameters
parser.add_argument("--allow_soft_placement", default=True, type=bool, help="Allow device soft device placement")
parser.add_argument("--log_device_placement", default=False, type=bool, help="Log placement of ops on devices")

FLAGS = parser.parse_args();
# FLAGS = tf.flags.FLAGS
# FLAGS._parse_flags()
# FLAGS(sys.argv)

print("\nParameters:")
print(FLAGS)


# Load data
print("Loading data...")
trainset = Dataset('../../data/'+FLAGS.dataset+'/train.ss')
devset = Dataset('../../data/'+FLAGS.dataset+'/dev.ss')
testset = Dataset('../../data/'+FLAGS.dataset+'/test.ss')

alldata = np.concatenate([trainset.t_docs, devset.t_docs, testset.t_docs], axis=0)
embeddingpath = '../../data/'+FLAGS.dataset+'/embedding.txt'
embeddingfile, wordsdict = data_helpers.load_embedding(embeddingpath, alldata, FLAGS.embedding_dim)
del alldata
print("Loading data finished...")

usrdict, prddict = trainset.get_usr_prd_dict()
trainbatches = trainset.batch_iter(usrdict, prddict, wordsdict, FLAGS.n_class, FLAGS.batch_size,
                                 FLAGS.num_epochs, FLAGS.max_sen_len, FLAGS.max_doc_len)
devset.genBatch(usrdict, prddict, wordsdict, FLAGS.batch_size,
                  FLAGS.max_sen_len, FLAGS.max_doc_len, FLAGS.n_class)
testset.genBatch(usrdict, prddict, wordsdict, FLAGS.batch_size,
                        "Log placement of ops on devices")
tf.flags.DEFINE_string("model_type", "classification",
                       "model type classification or regression")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Load data
print("Loading data...")

stime = time.time()
trainset = Dataset('data/train.txt')
etime = time.time()
print "================= load trainset ===============", etime - stime
devset = Dataset('data/dev.txt')
stime = time.time()
print "================= load devset ===============", stime - etime
testset = Dataset('data/test.txt')
etime = time.time()
print "================= load testset ===============", etime - stime

# alldata = np.concatenate([trainset.t_docs, devset.t_docs, testset.t_docs], axis=0)

fs = open('data/wordlist.txt')
alldata = fs.readlines()
alldata = [item.strip() for item in alldata]
fs.close()
예제 #3
0
tf.flags.DEFINE_string("checkpoint_dir", "", "Checkpoint directory from training run")

# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Load data
checkpoint_file = tf.train.latest_checkpoint("../checkpoints/"+FLAGS.dataset+"/"+FLAGS.checkpoint_dir+"/")
testset = Dataset('../../data/'+FLAGS.dataset+'/test.ss')
with open("../checkpoints/"+FLAGS.dataset+"/"+FLAGS.checkpoint_dir+"/wordsdict.txt", 'rb') as f:
    wordsdict = pickle.load(f)
with open("../checkpoints/"+FLAGS.dataset+"/"+FLAGS.checkpoint_dir+"/usrdict.txt", 'rb') as f:
    usrdict = pickle.load(f)
with open("../checkpoints/"+FLAGS.dataset+"/"+FLAGS.checkpoint_dir+"/prddict.txt", 'rb') as f:
    prddict = pickle.load(f)

testset.genBatch(usrdict, prddict, wordsdict, FLAGS.batch_size,
                  FLAGS.max_sen_len, FLAGS.max_doc_len, FLAGS.n_class)

graph = tf.Graph()
with graph.as_default():
    session_config = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement
예제 #4
0
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))

# Load data
checkpoint_dir = os.path.abspath("checkpoints/" + str(FLAGS.max_sen_len) + "/")
checkpoint_prefix = os.path.join(
    "./checkpoints/" + str(FLAGS.max_sen_len) + "/", "model")
# checkpoint_file = tf.train.import_meta_graph(checkpoint_prefix+".meta")
print "====", checkpoint_prefix

stime = time.time()
testset = Dataset('data/final_test.txt', True)
etime = time.time()
print "================= load testset ===============", etime - stime
fs = open('data/wordlist.txt')
alldata = fs.readlines()
alldata = [item.strip() for item in alldata]
fs.close()
estime = time.time()
print "================= load wordsdict ===============", estime - etime
embeddingpath = 'data/embeding1'
embeddingfile, wordsdict = data_helpers.load_embedding(embeddingpath, alldata,
                                                       FLAGS.embedding_dim)

print type(embeddingfile)
del alldata
stime = time.time()