Exemple #1
0
    parser.add_argument("--checkpoint_dir",
                        type=str,
                        default="saved_model",
                        help="Checkpoint dir for saved model.")
    parser.add_argument("--batch_size",
                        type=int,
                        default=24,
                        help="Batch size.")


parser = argparse.ArgumentParser()
add_arguments(parser)
args = parser.parse_args()

print("Loading dictionary...")
word_dict, reversed_dict, document_max_len = build_dict(args.test_tsv,
                                                        is_train=False)
print("Building test dataset...")
test_x, test_y = build_dataset(args.test_tsv, word_dict, document_max_len)

checkpoint_file = tf.train.latest_checkpoint(args.checkpoint_dir)

# File for saving the predicted values

nameHandle = open('y_pred.txt', 'w')

graph = tf.Graph()
with graph.as_default():
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
        saver.restore(sess, checkpoint_file)
            if SIGHT == True:
                #new adoption event, init "source" field as empty list
                adopt = dict()
                adopt["source"] = list()

                #check all potential source repos user is subscribed to
                #if most recent lib commit to repo is after user subscribed,
                #log as an adoption event
                for r in user_repos[user]:
                    #if repo has library and most recent commit of lib was after
                    #user subscribed, adoption!
                    if lib in repo_imports[r] and repo_imports[r][lib][
                            "time"] > repo_users[r][user]:
                        #save the adoption! add repo commit to source list
                        adopt["source"].append(
                            data.build_dict(repo_imports[r][lib]["user"], r,
                                            repo_imports[r][lib]["time"]))

                #if adoption has valid sources, set up "target" and save
                if len(adopt["source"]) != 0:
                    #set up target data
                    adopt["target"] = data.build_dict(user, repo, time)
                    #save adoption event to list
                    adoption_events[lib].append(adopt)
                    adoption_count = adoption_count + 1

                #adoption are not, update the data structures to reflect commit
                user_quiver[user].add(lib)  #user has used lib, add to quiver
                #update most recent commit of this lib in repo
                repo_imports[repo][lib]["user"] = user
                repo_imports[repo][lib]["time"] = time
                #add user to repo user list, tracking time user first "joined"
import tensorflow as tf
import pickle
from model import Model
from data_utils import build_dict, build_dataset, batch_iter
from train import hyper_params_path, word2index_path, seq2seq_model_dir

with open(hyper_params_path, "rb") as f:
    args = pickle.load(f)

print("Loading dictionary...")
word_dict, reversed_dict, article_list, _ = build_dict(
    word2index_path=word2index_path)
print("Loading validation dataset...")
valid_x = build_dataset(word_dict, article_list, args.article_max_len)

with tf.Session() as sess:
    print("Loading saved model...")
    model = Model(word_dict, args, train=False)
    saver = tf.train.Saver(tf.global_variables())
    ckpt = tf.train.get_checkpoint_state(seq2seq_model_dir)
    saver.restore(sess, ckpt.model_checkpoint_path)

    batches = batch_iter(valid_x, [0] * len(valid_x), args.batch_size, 1)

    print("Writing summaries to 'result.txt'...")
    for batch_x, _ in batches:
        batch_x_len = [len([y for y in x if y != 0]) for x in batch_x]

        valid_feed_dict = {
            model.batch_size: len(batch_x),
            model.X: batch_x,
Exemple #4
0
    parser.add_argument("--checkpoint_dir",
                        type=str,
                        default="saved_model",
                        help="Checkpoint directory.")


parser = argparse.ArgumentParser()
add_arguments(parser)
args = parser.parse_args()

num_class = 3
if not os.path.exists(args.checkpoint_dir):
    os.mkdir(args.checkpoint_dir)

print("Building dictionary...")
word_dict, reversed_dict, document_max_len = build_dict(args.train_tsv)
print("Building dataset...")
x, y = build_dataset(args.train_tsv, word_dict, document_max_len)
# Split to train and validation data
train_x, valid_x, train_y, valid_y = train_test_split(x,
                                                      y,
                                                      test_size=0.10,
                                                      random_state=42)

#train_x, train_y = build_dataset(args.train_tsv, word_dict, document_max_len)

#print("Building validation dictionary...")
#
#valid_tsv = 'data/lstm_single/africell_calls/dev_data.tsv'
#word_dict_valid, reversed_dict, document_max_len_valid = build_dict(valid_tsv)
#print("Building validation dataset...")
        os.makedirs(os.path.dirname(hyper_params_path))
        with open(hyper_params_path, "wb") as f:
            pickle.dump(args, f)

    if not os.path.exists(seq2seq_model_dir):
        os.mkdir(seq2seq_model_dir)
    else:
        if args.with_model:
            pre_model_checkpoint = open(seq2seq_model_dir + 'checkpoint', 'r')
            pre_model_checkpoint = "".join([
                seq2seq_model_dir,
                pre_model_checkpoint.read().splitlines()[0].split('"')[1]
            ])

    print("Building dictionary...")
    word_dict, reversed_dict, article_list, headline_list = build_dict(
        train=True, word2index_path=word2index_path)
    print("Loading training dataset...")
    train_x, train_y = build_dataset(word_dict,
                                     article_list,
                                     args.article_max_len,
                                     headline_list=headline_list,
                                     headline_max_len=args.headline_max_len,
                                     train=True)

    with tf.Session() as sess:
        model = Model(word_dict, args)
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        if 'pre_model_checkpoint' in globals():
            print("Continuing training from pre-trained model:",
                  pre_model_checkpoint, "......")