コード例 #1
0
def read_records():
  # Tell TensorFlow that the model will be built into the default Graph.
  if FLAGS.sparse:
    inputs = melt.read_sparse.inputs
    decode = decode_examples
  else:
    inputs = melt.read.inputs
    decode = decode_example

  #looks like setting sparse==1 or 0 all ok, but sparse=1 is faster...
  #may be for large example and you only decode a small part features then sparse==0 will be
  #faster since decode before shuffle, shuffle less data
  #but sparse==0 for one flow can deal both sparse input and dense input
  
  with tf.Graph().as_default():
    id, X, y = inputs(
      sys.argv[1], 
      decode=decode,
      batch_size=FLAGS.batch_size,
      num_epochs=FLAGS.num_epochs, 
      num_preprocess_threads=FLAGS.num_preprocess_threads,
      batch_join=FLAGS.batch_join,
      shuffle=FLAGS.shuffle)
    
    tf_flow(lambda sess, step: read_once(sess, step, [id, X, y]))
コード例 #2
0
def read_records():
    # Tell TensorFlow that the model will be built into the default Graph.
    #can only use decode_the_shuffle since right now only tf.parse_single_sequence_example,
    #@TODO verify sparse will work, for example seems decode_the_shuffle and shuffle_then_decode
    #all work for both dense and sparse
    inputs = melt.decode_then_shuffle.inputs
    #inputs = melt.shuffle_then_decode.inputs #since only parse_single_sequence_example could not use shuffle_then_decode
    decode_fn = decode_example

    #looks like setting sparse==1 or 0 all ok, but sparse=1 is faster...
    #may be for large example and you only decode a small part features then sparse==0 will be
    #faster since decode before shuffle, shuffle less data
    #but sparse==0 for one flow can deal both sparse input and dense input
    with tf.Graph().as_default():
        id, X, y, length = inputs(
            sys.argv[1],
            decode=decode_fn,
            batch_size=FLAGS.batch_size,
            num_epochs=FLAGS.num_epochs,
            num_threads=FLAGS.num_threads,
            dynamic_pad=True,
            batch_join=FLAGS.batch_join,
            bucket_boundaries=[int(x) for x in FLAGS.buckets.split(',') if x],
            length_index=-1)

        tf_flow(lambda sess, step: read_once(sess, step, [id, X, y, length]))
コード例 #3
0
def read_records():
    # Tell TensorFlow that the model will be built into the default Graph.
    if FLAGS.shuffle_then_decode:
        inputs = melt.shuffle_then_decode.inputs
        decode_fn = decode_examples
    else:
        inputs = melt.decode_then_shuffle.inputs
        decode_fn = decode_example

    print('shuffle_then_decode?:', FLAGS.shuffle_then_decode)

    #looks like setting sparse==1 or 0 all ok, but sparse=1 is faster...
    #may be for large example and you only decode a small part features then sparse==0 will be
    #faster since decode before shuffle, shuffle less data
    #but sparse==0 for one flow can deal both sparse input and dense input

    #well, may be earlier tf version work with both shuffle_then_decode 1 or 0
    #now only work shuffle_then_decode for data with sparse ... if set 0
    #ValueError: All shapes must be fully defined: [TensorShape([]), TensorShape([Dimension(1)]), TensorShape([Dimension(None)]), TensorShape([Dimension(None), Dimension(100)]), TensorShape([Dimension(1)])]
    #since shuffle_then_decode is faster prefer to use it  ok!

    with tf.Graph().as_default():
        ops = inputs(sys.argv[1],
                     decode_fn=decode_fn,
                     batch_size=FLAGS.batch_size,
                     num_epochs=FLAGS.num_epochs,
                     num_threads=FLAGS.num_threads,
                     batch_join=FLAGS.batch_join,
                     shuffle_files=FLAGS.shuffle,
                     fix_random=True)

        tf_flow(lambda sess, step: read_once(sess, step, ops))
コード例 #4
0
ファイル: read-records.py プロジェクト: tangqiqi123/hasky
def read_records():
    inputs, decode = input.get_decodes(FLAGS.shuffle_then_decode,
                                       FLAGS.dynamic_batch_length)

    ops = inputs(
        FLAGS.input,
        decode=decode,
        batch_size=FLAGS.batch_size,
        num_epochs=FLAGS.num_epochs,
        num_threads=FLAGS.num_threads,
        #num_threads=1,
        batch_join=FLAGS.batch_join,
        shuffle_batch=FLAGS.shuffle_batch,
        shuffle=FLAGS.shuffle,
        #fix_random=True,
        #fix_sequence=True,
        #no_random=True,
        allow_smaller_final_batch=True,
    )
    print(ops)

    timer = Timer()
    tf_flow(lambda sess, step: read_once(sess, step, ops))
    print('max_index:', max_index)
    print(timer.elapsed())
コード例 #5
0
def read_records():
    inputs, decode = input.get_decodes()
    #@TODO looks like single thread will be faster, but more threads for better randomness ?
    ops = inputs(
        FLAGS.input,
        decode_fn=decode,
        #batch_size=FLAGS.batch_size,
        batch_size=10,
        num_epochs=FLAGS.num_epochs,
        num_threads=FLAGS.num_threads,
        #num_threads=1,
        batch_join=FLAGS.batch_join,
        shuffle_batch=FLAGS.shuffle_batch,
        shuffle_files=FLAGS.shuffle_files,
        #fix_random=True,
        fix_sequence=True,
        #no_random=True,
        allow_smaller_final_batch=True,
    )
    print(ops)

    timer = Timer()
    tf_flow(lambda sess, step: read_once(sess, step, ops))
    print('max_index:', max_index)
    print(timer.elapsed())
コード例 #6
0
ファイル: train.py プロジェクト: tangqiqi123/hasky
def train():
    global vocab_size
    vocabulary.init()
    vocab_size = vocabulary.get_vocab_size()

    def seq2seq_criterion(vocabSize):
        weight = torch.ones(vocabSize)
        weight[0] = 0
        crit = nn.NLLLoss(weight, size_average=False)
        if torch.cuda.is_available():
            crit.cuda()
        return crit

    global criterion
    criterion = seq2seq_criterion(vocab_size)

    model = seq2seq.Seq2Seq(vocab_size, FLAGS.emb_dim, FLAGS.rnn_hidden_size,
                            FLAGS.batch_size)

    if torch.cuda.is_available():
        model.cuda()

    init_range = 0.08
    model.init_weights(init_range)
    optimizer = optim.Adagrad(model.parameters(), lr=FLAGS.learning_rate)

    inputs, decode = input.get_decodes(FLAGS.shuffle_then_decode,
                                       FLAGS.dynamic_batch_length)
    inputs = functools.partial(
        inputs,
        decode=decode,
        num_epochs=FLAGS.num_epochs,
        num_threads=FLAGS.num_threads,
        batch_join=FLAGS.batch_join,
        shuffle_batch=FLAGS.shuffle_batch,
        shuffle=FLAGS.shuffle,
        allow_smaller_final_batch=True,
    )

    ops = inputs(FLAGS.input, batch_size=FLAGS.batch_size)
    print(ops)

    eval_ops = None
    if FLAGS.valid_input:
        #eval_ops = inputs(FLAGS.valid_input, batch_size=FLAGS.batch_size*10)
        eval_ops = inputs(FLAGS.valid_input, batch_size=FLAGS.batch_size)

    timer = Timer()
    tf_flow(lambda sess, step: process_once(sess, step, ops, eval_ops, model,
                                            optimizer))
    print(timer.elapsed())
コード例 #7
0
def read_records():
    # Tell TensorFlow that the model will be built into the default Graph.
    inputs = melt.read_sparse.inputs
    decode = melt.libsvm_decode.decode

    with tf.Graph().as_default():
        X, y = inputs(sys.argv[1],
                      decode=decode,
                      batch_size=FLAGS.batch_size,
                      num_epochs=FLAGS.num_epochs,
                      num_preprocess_threads=FLAGS.num_preprocess_threads,
                      batch_join=FLAGS.batch_join,
                      shuffle=FLAGS.shuffle)

        tf_flow(lambda sess, step: read_once(sess, step, [X, y]))
コード例 #8
0
ファイル: read-records-melt.py プロジェクト: fword/hasky
def read_records():
    # Tell TensorFlow that the model will be built into the default Graph.
    inputs = melt.read_sparse.inputs
    label_type = tf.int64 if FLAGS.label_type == 'int' else tf.float32
    decode = functools.partial(melt.libsvm_decode.decode,
                               label_type=label_type)

    with tf.Graph().as_default():
        X, y = inputs(sys.argv[1],
                      decode=decode,
                      batch_size=FLAGS.batch_size,
                      num_epochs=FLAGS.num_epochs,
                      num_preprocess_threads=FLAGS.num_preprocess_threads,
                      batch_join=FLAGS.batch_join,
                      shuffle=FLAGS.shuffle)
        tf_flow(lambda sess, step: read_once(sess, step, [X, y]))
コード例 #9
0
ファイル: read-records.py プロジェクト: fword/hasky
def read_records():
    inputs, decode, decode_neg = input.get_decodes(FLAGS.shuffle_then_decode,
                                                   FLAGS.dynamic_batch_length)
    #@TODO looks like single thread will be faster, but more threads for better randomness ?
    ops = inputs(
        FLAGS.input,
        decode=decode,
        batch_size=FLAGS.batch_size,
        num_epochs=FLAGS.num_epochs,
        num_threads=FLAGS.num_threads,
        #num_threads=1,
        batch_join=FLAGS.batch_join,
        shuffle_batch=FLAGS.shuffle_batch,
        shuffle=FLAGS.shuffle,
        #fix_random=True,
        #fix_sequence=True,
        #no_random=True,
        allow_smaller_final_batch=True,
    )
    print(ops)

    neg_ops = None
    if FLAGS.num_negs:
        neg_ops = inputs(FLAGS.input,
                         decode=decode_neg,
                         batch_size=FLAGS.batch_size * FLAGS.num_negs,
                         num_epochs=FLAGS.num_epochs,
                         num_threads=FLAGS.num_threads,
                         batch_join=FLAGS.batch_join,
                         shuffle=FLAGS.shuffle)
        neg_ops = input.reshape_neg_tensors(neg_ops, FLAGS.batch_size,
                                            FLAGS.num_negs)

        neg_ops = list(neg_ops)

    timer = Timer()
    tf_flow(lambda sess, step: read_once(sess, step, ops, neg_ops))
    print('max_index:', max_index)
    print(timer.elapsed())
コード例 #10
0
def read_records():
    # Tell TensorFlow that the model will be built into the default Graph.
    label_type = tf.int64 if FLAGS.label_type == 'int' else tf.float32
    if FLAGS.shuffle_then_decode:
        inputs = melt.shuffle_then_decode.inputs
        decode_fn = functools.partial(melt.libsvm_decode.decode,
                                      label_type=label_type)
    else:
        inputs = melt.decode_then_shuffle.inputs
        decode_fn = functools.partial(decode_example, label_type=label_type)

    with tf.Graph().as_default():
        X, y = inputs(
            sys.argv[1],
            decode_fn=decode_fn,
            batch_size=FLAGS.batch_size,
            num_epochs=FLAGS.num_epochs,
            num_threads=FLAGS.num_threads,
            batch_join=FLAGS.batch_join,
            shuffle_files=FLAGS.shuffle_files,
            dynamic_pad=FLAGS.dynamic_pad
        )  #here is just test dynamic_pad not work for sparse if use decode_then_shuffle
        tf_flow(lambda sess, step: read_once(sess, step, [X, y]))
コード例 #11
0
def read_records():
    # Tell TensorFlow that the model will be built into the default Graph.
    #can only use decode_the_shuffle since right now only tf.parse_single_sequence_example,
    #@TODO verify sparse will work, for example seems decode_the_shuffle and shuffle_then_decode
    #all work for both dense and sparse
    inputs = melt.decode_then_shuffle.inputs
    decode = decode_example

    #looks like setting sparse==1 or 0 all ok, but sparse=1 is faster...
    #may be for large example and you only decode a small part features then sparse==0 will be
    #faster since decode before shuffle, shuffle less data
    #but sparse==0 for one flow can deal both sparse input and dense input

    with tf.Graph().as_default():
        id, X, y = inputs(sys.argv[1],
                          decode=decode,
                          batch_size=FLAGS.batch_size,
                          num_epochs=FLAGS.num_epochs,
                          num_threads=FLAGS.num_threads,
                          batch_join=FLAGS.batch_join,
                          shuffle=FLAGS.shuffle)

        tf_flow(lambda sess, step: read_once(sess, step, [id, X, y]))
コード例 #12
0
def read_records():
    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        X, y = inputs(sys.argv[1], decode=decode, batch_size=FLAGS.batch_size)

        tf_flow(lambda sess, step: read_once(sess, step, [X, y]))