def test_train_predict3():
    '''
    Test that a model trained on sequencees of one length can be used for predictions on other sequence lengths
    '''
    import tempfile
    sp = SequencePattern("sorted", in_seq_len=10, out_seq_len=10)
    tempdir = tempfile.mkdtemp()
    ts2s = TFLearnSeq2Seq(sp,
                          seq2seq_model="embedding_attention",
                          data_dir=tempdir,
                          name="attention")
    tf.reset_default_graph()
    ts2s.train(num_epochs=1,
               num_points=1000,
               weights_output_fn=1,
               weights_input_fn=0)
    assert os.path.exists(ts2s.weights_output_fn)

    tf.reset_default_graph()
    sp = SequencePattern("sorted", in_seq_len=20, out_seq_len=8)
    tf.reset_default_graph()
    ts2s = TFLearnSeq2Seq(sp,
                          seq2seq_model="embedding_attention",
                          data_dir="DATA",
                          name="attention",
                          verbose=1)
    x = np.random.randint(0, 9, 20)
    prediction, y = ts2s.predict(x, weights_input_fn=1)
    assert len(prediction == 8)

    os.system("rm -rf %s" % tempdir)
def test_sp2():
    '''
    Test two SequencePattern instance with lengths different from default
    '''
    sp = SequencePattern("sorted", in_seq_len=20, out_seq_len=5)
    x = np.random.randint(0, 9, 20)
    y = sp.generate_output_sequence(x)
    assert len(y)==5
    y_exp = sorted(x)[:5]
    assert all(y==y_exp)
def test_sp2():
    '''
    Test two SequencePattern instance with lengths different from default
    '''
    sp = SequencePattern("sorted", in_seq_len=20, out_seq_len=5)
    x = np.random.randint(0, 9, 20)
    y = sp.generate_output_sequence(x)
    assert len(y) == 5
    y_exp = sorted(x)[:5]
    assert all(y == y_exp)
def test_train_predict2():
    '''
    Test that the embedding_attention model works, with saving and loading of weights
    '''
    import tempfile
    sp = SequencePattern()
    tempdir = tempfile.mkdtemp()
    ts2s = TFLearnSeq2Seq(sp,
                          seq2seq_model="embedding_attention",
                          data_dir=tempdir,
                          name="attention")
    tf.reset_default_graph()
    ts2s.train(num_epochs=1,
               num_points=1000,
               weights_output_fn=1,
               weights_input_fn=0)
    assert os.path.exists(ts2s.weights_output_fn)

    tf.reset_default_graph()
    ts2s = TFLearnSeq2Seq(sp,
                          seq2seq_model="embedding_attention",
                          data_dir="DATA",
                          name="attention",
                          verbose=1)
    prediction, y = ts2s.predict(Xin=range(10), weights_input_fn=1)
    assert len(prediction == 10)

    os.system("rm -rf %s" % tempdir)
def test_sp1():
    '''
    Test two different SequencePattern instances
    '''
    sp = SequencePattern("maxmin_dup")
    y = sp.generate_output_sequence(range(10))
    assert all(y == np.array([9, 0, 2, 3, 4, 5, 6, 7, 8, 9]))
    sp = SequencePattern("sorted")
    y = sp.generate_output_sequence([5, 6, 1, 2, 9])
    assert all(y == np.array([1, 2, 5, 6, 9]))
    sp = SequencePattern("reversed")
    y = sp.generate_output_sequence(range(10))
    assert all(y == np.array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]))
def test_predict1():
    '''
    Test simple preductions using weights just produced (in test_train1)
    '''
    sp = SequencePattern()
    ts2s = TFLearnSeq2Seq(sp, verbose=1)
    wfn = "test_%s" % ts2s.canonical_weights_fn(0)
    print("using weights filename %s" % wfn)
    tf.reset_default_graph()
    prediction, y = ts2s.predict(Xin=range(10), weights_input_fn=wfn)
    assert len(prediction == 10)
def test_train1():
    '''
    Test simple training of an embedding_rnn seq2seq model
    '''
    sp = SequencePattern()
    ts2s = TFLearnSeq2Seq(sp)
    ofn = "test_%s" % ts2s.canonical_weights_fn(0)
    print("using weights filename %s" % ofn)
    if os.path.exists(ofn):
        os.unlink(ofn)
    tf.reset_default_graph()
    ts2s.train(num_epochs=1, num_points=10000, weights_output_fn=ofn)
    assert os.path.exists(ofn)
def test_sp1():
    '''
    Test two different SequencePattern instances
    '''
    sp = SequencePattern("maxmin_dup")
    y = sp.generate_output_sequence(range(10))
    assert all(y==np.array([9, 0, 2, 3, 4, 5, 6, 7, 8, 9]))    
    sp = SequencePattern("sorted")
    y = sp.generate_output_sequence([5,6,1,2,9])
    assert all(y==np.array([1, 2, 5, 6, 9]))
    sp = SequencePattern("reversed")
    y = sp.generate_output_sequence(range(10))
    assert all(y==np.array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]))
def CommandLine(args=None, arglist=None):
    '''
    Main command line.  Accepts args, to allow for simple unit testing.
    '''
    help_text = """
Commands:

train - give size of training set to use, as argument
predict - give input sequence as argument (or specify inputs via --from-file <filename>)

"""
    parser = argparse.ArgumentParser(
        description=help_text, formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument("cmd", help="command")
    parser.add_argument("cmd_input", nargs='*', help="input to command")
    parser.add_argument(
        '-v',
        "--verbose",
        nargs=0,
        help="increase output verbosity (add more -v to increase versbosity)",
        action=VAction,
        dest='verbose')
    parser.add_argument(
        "-m",
        "--model",
        help=
        "seq2seq model name: either embedding_rnn (default) or embedding_attention",
        default=None)
    parser.add_argument("-r",
                        "--learning-rate",
                        type=float,
                        help="learning rate (default 0.0001)",
                        default=0.0001)
    parser.add_argument("-e",
                        "--epochs",
                        type=int,
                        help="number of trainig epochs",
                        default=10)
    parser.add_argument("-i",
                        "--input-weights",
                        type=str,
                        help="tflearn file with network weights to load",
                        default=None)
    parser.add_argument(
        "-o",
        "--output-weights",
        type=str,
        help="new tflearn file where network weights are to be saved",
        default=None)
    parser.add_argument("-p",
                        "--pattern-name",
                        type=str,
                        help="name of pattern to use for sequence",
                        default=None)
    parser.add_argument(
        "-n",
        "--name",
        type=str,
        help="name of model, used when generating default weights filenames",
        default=None)
    parser.add_argument("--in-len",
                        type=int,
                        help="input sequence length (default 10)",
                        default=None)
    parser.add_argument("--out-len",
                        type=int,
                        help="output sequence length (default 10)",
                        default=None)
    parser.add_argument(
        "--from-file",
        type=str,
        help="name of file to take input data sequences from (json format)",
        default=None)
    parser.add_argument(
        "--iter-num",
        type=int,
        help=
        "training iteration number; specify instead of input- or output-weights to use generated filenames",
        default=None)
    parser.add_argument(
        "--data-dir",
        help=
        "directory to use for storing checkpoints (also used when generating default weights filenames)",
        default=None)
    # model parameters
    parser.add_argument(
        "-L",
        "--num-layers",
        type=int,
        help="number of RNN layers to use in the model (default 1)",
        default=1)
    parser.add_argument("--cell-size",
                        type=int,
                        help="size of RNN cell to use (default 32)",
                        default=32)
    parser.add_argument("--cell-type",
                        type=str,
                        help="type of RNN cell to use (default BasicLSTMCell)",
                        default="BasicLSTMCell")
    parser.add_argument("--embedding-size",
                        type=int,
                        help="size of embedding to use (default 20)",
                        default=20)
    parser.add_argument("--tensorboard-verbose",
                        type=int,
                        help="tensorboard verbosity level (default 0)",
                        default=0)

    if not args:
        args = parser.parse_args(arglist)

    if args.iter_num is not None:
        args.input_weights = args.iter_num
        args.output_weights = args.iter_num + 1

    model_params = dict(
        num_layers=args.num_layers,
        cell_size=args.cell_size,
        cell_type=args.cell_type,
        embedding_size=args.embedding_size,
        learning_rate=args.learning_rate,
        tensorboard_verbose=args.tensorboard_verbose,
    )

    if args.cmd == "train":
        try:
            num_points = int(args.cmd_input[0])
        except:
            raise Exception(
                "Please specify the number of datapoints to use for training, as the first argument"
            )
        sp = SequencePattern(args.pattern_name,
                             in_seq_len=args.in_len,
                             out_seq_len=args.out_len)
        ts2s = TFLearnSeq2Seq(sp,
                              seq2seq_model=args.model,
                              data_dir=args.data_dir,
                              name=args.name,
                              verbose=args.verbose)
        ts2s.train(num_epochs=args.epochs,
                   num_points=num_points,
                   weights_output_fn=args.output_weights,
                   weights_input_fn=args.input_weights,
                   model_params=model_params)
        return ts2s

    elif args.cmd == "predict":
        if args.from_file:
            inputs = json.loads(args.from_file)
        try:
            input_x = list(map(int, args.cmd_input))
            inputs = [input_x]
        except:
            raise Exception(
                "Please provide a space-delimited input sequence as the argument"
            )

        sp = SequencePattern(args.pattern_name,
                             in_seq_len=args.in_len,
                             out_seq_len=args.out_len)
        ts2s = TFLearnSeq2Seq(sp,
                              seq2seq_model=args.model,
                              data_dir=args.data_dir,
                              name=args.name,
                              verbose=args.verbose)
        results = []
        for x in inputs:
            prediction, y = ts2s.predict(x,
                                         weights_input_fn=args.input_weights,
                                         model_params=model_params)
            print("==> For input %s, prediction=%s (expected=%s)" %
                  (x, prediction, sp.generate_output_sequence(x)))
            results.append([prediction, y])
        ts2s.prediction_results = results
        return ts2s

    else:
        print("Unknown command %s" % args.cmd)
Exemple #10
0
def CommandLine(args=None, arglist=None):
    '''
    Main command line.  Accepts args, to allow for simple unit testing.
    '''
    help_text = """
Commands:

train - give size of training set to use, as argument
predict - give input sequence as argument (or specify inputs via --from-file <filename>)

"""
    parser = argparse.ArgumentParser(
        description=help_text, formatter_class=argparse.RawTextHelpFormatter)

    #parser.add_argument("cmd", help="command")
    #parser.add_argument("cmd_input", nargs='*', help="input to command")
    #parser.add_argument('-v', "--verbose", nargs=0, help="increase output verbosity (add more -v to increase versbosity)", action=VAction, dest='verbose')
    #parser.add_argument("-m", "--model", help="seq2seq model name: either embedding_rnn (default) or embedding_attention", default=None)
    #parser.add_argument("-r", "--learning-rate", type=float, help="learning rate (default 0.0001)", default=0.0001)
    #parser.add_argument("-e", "--epochs", type=int, help="number of trainig epochs", default=10)
    #parser.add_argument("-i", "--input-weights", type=str, help="tflearn file with network weights to load", default=None)
    #parser.add_argument("-o", "--output-weights", type=str, help="new tflearn file where network weights are to be saved", default=None)
    #parser.add_argument("-p", "--pattern-name", type=str, help="name of pattern to use for sequence", default=None)
    #parser.add_argument("-n", "--name", type=str, help="name of model, used when generating default weights filenames", default=None)
    #parser.add_argument("--in-len", type=int, help="input sequence length (default 10)", default=None)
    #parser.add_argument("--out-len", type=int, help="output sequence length (default 10)", default=None)
    #parser.add_argument("--from-file", type=str, help="name of file to take input data sequences from (json format)", default=None)
    parser.add_argument(
        "--iter-num",
        type=int,
        help=
        "training iteration number; specify instead of input- or output-weights to use generated filenames",
        default=None)
    #parser.add_argument("--data-dir", help="directory to use for storing checkpoints (also used when generating default weights filenames)", default=None)
    # model parameters
    #parser.add_argument("-L", "--num-layers", type=int, help="number of RNN layers to use in the model (default 1)", default=1)
    #parser.add_argument("--cell-size", type=int, help="size of RNN cell to use (default 32)", default=32)
    #parser.add_argument("--cell-type", type=str, help="type of RNN cell to use (default BasicLSTMCell)", default="BasicLSTMCell")
    #parser.add_argument("--embedding-size", type=int, help="size of embedding to use (default 20)", default=20)
    #parser.add_argument("--tensorboard-verbose", type=int, help="tensorboard verbosity level (default 0)", default=0)

    if not args:
        args = parser.parse_args(arglist)

    p_num_layers = 1
    p_cell_size = 32
    p_cell_type = 'BasicLSTMCell'
    p_embedding_size = 32
    p_learning_rate = 0.0001
    operation = "train"
    p_train_data_size = 10000
    p_pattern_name = "sorted"
    p_in_len = 32
    p_out_len = 32
    p_model = "embedding_rnn"
    p_data_dir = "models"
    p_name = "test1"
    p_epochs = 50
    #p_input_weights = "/share/users/bsamadi/seq2seq/tflearn_seq2seq/sort_32_orig_input"
    p_input_weights = None
    #p_ouput_weights = "test_hame_yek"
    #p_ouput_weights = "sort_256"
    #p_ouput_weights = "sort_256_orig_input"
    #p_ouput_weights = ""
    p_ouput_weights = "orig_32"
    #p_ouput_weights = "try_on_hist"
    #p_ouput_weights = None
    A = np.load("input_histograms_32.npy").astype(np.uint32)
    B = np.load("output_histograms_32.npy").astype(np.uint32)
    max_input = np.max(A)
    max_output = np.max(A)

    #max_input = 3000
    #max_output = 3000

    if args.iter_num is not None:
        args.input_weights = args.iter_num
        args.output_weights = args.iter_num + 1

    model_params = dict(
        num_layers=p_num_layers,
        cell_size=p_cell_size,
        cell_type=p_cell_type,
        embedding_size=p_embedding_size,
        learning_rate=p_learning_rate,
    )

    if operation == "train":
        num_points = p_train_data_size
        sp = SequencePattern(p_pattern_name,
                             in_seq_len=p_in_len,
                             out_seq_len=p_out_len,
                             max_input=max_input,
                             max_output=max_output)
        ts2s = TFLearnSeq2Seq(sp,
                              seq2seq_model=p_model,
                              data_dir=p_data_dir,
                              name=p_name)
        ts2s.train(num_epochs=p_epochs,
                   num_points=num_points,
                   weights_output_fn=p_ouput_weights,
                   weights_input_fn=p_input_weights,
                   model_params=model_params,
                   batch_size=8)
        return ts2s

    elif operation == "predict":
        A = np.load("input_histograms_32.npy").astype(np.uint32)
        A = A[0:1, :]
        A = np.array(A)
        print(A)

        inputs = A
        print(inputs)

        #if args.from_file:
        #inputs = json.loads(args.from_file)
        #try:
        #input_x = list(map(int, args.cmd_input))
        #inputs = [input_x]
        #except:
        #raise Exception("Please provide a space-delimited input sequence as the argument")

        sp = SequencePattern(p_pattern_name,
                             in_seq_len=p_in_len,
                             out_seq_len=p_out_len,
                             max_input=max_input,
                             max_output=max_output)
        ts2s = TFLearnSeq2Seq(sp,
                              seq2seq_model=p_model,
                              data_dir=p_data_dir,
                              name=p_name)
        results = []
        print("inputs", inputs, A)

        for x in inputs:
            prediction, y = ts2s.predict(x,
                                         weights_input_fn=p_input_weights,
                                         model_params=model_params)
            #print("==> For input %s, prediction=%s (expected=%s)" % (x, prediction, sp.generate_output_sequence(x)))
            results.append([prediction])
        print(results)
        exit()
        ts2s.prediction_results = results
        return ts2s

    else:
        print("Unknown command %s" % args.cmd)
def CommandLine(args=None, arglist=None):
    '''
    Main command line.  Accepts args, to allow for simple unit testing.
    '''
    help_text = """
Commands:

train - give size of training set to use, as argument
predict - give input sequence as argument (or specify inputs via --from-file <filename>)

"""
    parser = argparse.ArgumentParser(description=help_text, formatter_class=argparse.RawTextHelpFormatter)
    
    parser.add_argument("cmd", help="command")
    parser.add_argument("cmd_input", nargs='*', help="input to command")
    parser.add_argument('-v', "--verbose", nargs=0, help="increase output verbosity (add more -v to increase versbosity)", action=VAction, dest='verbose')
    parser.add_argument("-m", "--model", help="seq2seq model name: either embedding_rnn (default) or embedding_attention", default=None)
    parser.add_argument("-r", "--learning-rate", type=float, help="learning rate (default 0.0001)", default=0.0001)
    parser.add_argument("-e", "--epochs", type=int, help="number of trainig epochs", default=10)
    parser.add_argument("-i", "--input-weights", type=str, help="tflearn file with network weights to load", default=None)
    parser.add_argument("-o", "--output-weights", type=str, help="new tflearn file where network weights are to be saved", default=None)
    parser.add_argument("-p", "--pattern-name", type=str, help="name of pattern to use for sequence", default=None)
    parser.add_argument("-n", "--name", type=str, help="name of model, used when generating default weights filenames", default=None)
    parser.add_argument("--in-len", type=int, help="input sequence length (default 10)", default=None)
    parser.add_argument("--out-len", type=int, help="output sequence length (default 10)", default=None)
    parser.add_argument("--from-file", type=str, help="name of file to take input data sequences from (json format)", default=None)
    parser.add_argument("--iter-num", type=int, help="training iteration number; specify instead of input- or output-weights to use generated filenames", default=None)
    parser.add_argument("--data-dir", help="directory to use for storing checkpoints (also used when generating default weights filenames)", default=None)
    # model parameters
    parser.add_argument("-L", "--num-layers", type=int, help="number of RNN layers to use in the model (default 1)", default=1)
    parser.add_argument("--cell-size", type=int, help="size of RNN cell to use (default 32)", default=32)
    parser.add_argument("--cell-type", type=str, help="type of RNN cell to use (default BasicLSTMCell)", default="BasicLSTMCell")
    parser.add_argument("--embedding-size", type=int, help="size of embedding to use (default 20)", default=20)
    parser.add_argument("--tensorboard-verbose", type=int, help="tensorboard verbosity level (default 0)", default=0)

    if not args:
        args = parser.parse_args(arglist)
    
    if args.iter_num is not None:
        args.input_weights = args.iter_num
        args.output_weights = args.iter_num + 1

    model_params = dict(num_layers=args.num_layers,
                        cell_size=args.cell_size,
                        cell_type=args.cell_type,
                        embedding_size=args.embedding_size,
                        learning_rate=args.learning_rate,
                        tensorboard_verbose=args.tensorboard_verbose,
                    )

    if args.cmd=="train":
        try:
            num_points = int(args.cmd_input[0])
        except:
            raise Exception("Please specify the number of datapoints to use for training, as the first argument")
        sp = SequencePattern(args.pattern_name, in_seq_len=args.in_len, out_seq_len=args.out_len)
        ts2s = TFLearnSeq2Seq(sp, seq2seq_model=args.model, data_dir=args.data_dir, name=args.name, verbose=args.verbose)
        ts2s.train(num_epochs=args.epochs, num_points=num_points, weights_output_fn=args.output_weights, 
                   weights_input_fn=args.input_weights, model_params=model_params)
        return ts2s
        
    elif args.cmd=="predict":
        if args.from_file:
            inputs = json.loads(args.from_file)
        try:
            input_x = list(map(int, args.cmd_input))
            inputs = [input_x]
        except:
            raise Exception("Please provide a space-delimited input sequence as the argument")

        sp = SequencePattern(args.pattern_name, in_seq_len=args.in_len, out_seq_len=args.out_len)
        ts2s = TFLearnSeq2Seq(sp, seq2seq_model=args.model, data_dir=args.data_dir, name=args.name, verbose=args.verbose)
        results = []
        for x in inputs:
            prediction, y = ts2s.predict(x, weights_input_fn=args.input_weights, model_params=model_params)
            print("==> For input %s, prediction=%s (expected=%s)" % (x, prediction, sp.generate_output_sequence(x)))
            results.append([prediction, y])
        ts2s.prediction_results = results
        return ts2s

    else:
        print("Unknown command %s" % args.cmd)