Example #1
0
def output_test(path, gpu_id, saved_path, regression, useImage, useDoc):
    gpu = gpu_id >= 0
    print("reg : ", regression)
    print("gpu : ", gpu_id)
    print("save path : ", saved_path)
    print("img : ", useImage)
    print("doc : ", useDoc)
    dir = os.path.dirname(saved_path)
    if gpu:
        cuda.check_cuda_available()

    print("loading labels..")
    if regression:
        labels = np.array(pickle.load(
            open(os.path.join(path, 'answers_RT2.pkl'), "rb")),
                          dtype=np.float32)
    else:
        labels = np.array(pickle.load(
            open(os.path.join(path, "answers.pkl"), "rb")),
                          dtype=np.int32)

    print("loading doc2vec model..")
    doc_vectors = np.array(pickle.load(
        (open(os.path.join(path, "corpus_features.pkl"), "rb"))),
                           dtype=np.float32)

    print("make NN model..")
    if regression:
        dim = 1
    else:
        dim = 4
    worker = twitterNet_worker(dim, [min(labels)])

    # for fname in glob.glob('/media/yamashita004/4dad8012-5855-4d11-8128-8fc5247ba677/NeuralNet/GoogleNetBN_REG/model/*.model' ):

    print("loading NN model..")
    worker.load(saved_path)
    #print(fname)
    #worker.load(fname)
    if gpu:
        cuda.get_device(gpu_id).use()
        worker.model.to_gpu()

    perm = pickle.load(open(os.path.join(dir, 'test_perm.pkl'), "rb"))
    batchsize = 30
    catans = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
    coef = []

    p = ProgressBar(max_value=len(perm), min_value=1)
    pred = []
    ans = []
    for i in six.moves.range(0, len(perm), batchsize):
        p.update(i + 1)
        x_batch = get_images(perm[i:i + batchsize], path)
        x_batch_doc = doc_vectors[perm[i:i + batchsize]]
        y_batch = labels[perm[i:i + batchsize]]
        if regression:
            acoef, h, t = worker.test(x_batch,
                                      x_batch_doc,
                                      y_batch,
                                      regression,
                                      gpu=gpu)
            pred.extend(h)
            ans.extend(t)
            coef.append(acoef)
        else:
            for pred, ans in zip([h, t], y_batch):
                mymax = 0
                myid = 0
                for i, x in enumerate(pred):
                    if mymax < x:
                        mymax = x
                        myid = i
                catans[ans][myid] += 1

    pickle.dump(catans, open(os.path.join(path, "catdic.pkl"), "wb"))
    p.finish()
    if regression:
        corr = np.corrcoef(pred, ans)[0, 1]
        print(corr)
        print(np.mean(coef))
        with open('some.csv', 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for h, t in zip(pred, ans):
                writer.writerow([h, t])
Example #2
0
def output_test(path, gpu_id, saved_path, regression, useImage, useDoc):
    gpu = gpu_id >= 0
    print("reg : ", regression)
    print("gpu : ", gpu_id)
    print("save path : ", saved_path)
    print("img : ", useImage)
    print("doc : ", useDoc)
    dir = os.path.dirname(saved_path)
    if gpu:
        cuda.check_cuda_available()

    print("loading labels..")
    if regression:
        labels = np.array(pickle.load(open(os.path.join(path, 'answers_RT2.pkl'), "rb")), dtype=np.float32)
    else:
        labels = np.array(pickle.load(open(os.path.join(path, "answers.pkl"), "rb")), dtype=np.int32)

    print("loading doc2vec model..")
    doc_vectors = np.array(pickle.load((open(os.path.join(path, "corpus_features.pkl"), "rb"))), dtype=np.float32)

    print("make NN model..")
    if regression:
        dim = 1
    else:
        dim = 4
    worker = twitterNet_worker(dim, [min(labels)])

    # for fname in glob.glob('/media/yamashita004/4dad8012-5855-4d11-8128-8fc5247ba677/NeuralNet/GoogleNetBN_REG/model/*.model' ):

    print("loading NN model..")
    worker.load(saved_path)
    #print(fname)
    #worker.load(fname)
    if gpu:
        cuda.get_device(gpu_id).use()
        worker.model.to_gpu()

    perm = pickle.load(open(os.path.join(dir, 'test_perm.pkl'), "rb"))
    batchsize = 30
    catans = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
    coef=[]

    p = ProgressBar(max_value=len(perm), min_value=1)
    pred=[]
    ans=[]
    for i in six.moves.range(0, len(perm), batchsize):
        p.update(i+1)
        x_batch = get_images(perm[i:i + batchsize], path)
        x_batch_doc = doc_vectors[perm[i:i + batchsize]]
        y_batch = labels[perm[i:i + batchsize]]
        if regression:
            acoef,h,t=worker.test(x_batch, x_batch_doc, y_batch, regression, gpu=gpu)
            pred.extend(h)
            ans.extend(t)
            coef.append(acoef)
        else:
            for pred, ans in zip([h,t], y_batch):
                mymax = 0
                myid=0
                for i,x in enumerate(pred):
                    if mymax < x:
                        mymax = x
                        myid = i
                catans[ans][myid] += 1

    pickle.dump(catans, open(os.path.join(path, "catdic.pkl"), "wb"))
    p.finish()
    if regression:
        corr = np.corrcoef(pred, ans)[0, 1]
        print(corr)
        print(np.mean(coef))
        with open('some.csv', 'w') as f:
            writer = csv.writer(f, lineterminator='\n')
            for h, t in zip(pred, ans):
                writer.writerow([h, t])
Example #3
0
def train_and_test(path, gpu_id, load_path, saved_path, regression, useImage,
                   useDoc, iter):
    train_test_rate = 0.2
    batchsize = 25
    n_epoch = iter
    gpu = gpu_id >= 0
    print("reg : ", regression)
    print("gpu : ", gpu_id)
    print("path : ", path)
    print("load path : ", load_path)
    print("save path : ", saved_path)
    print("img : ", useImage)
    print("doc : ", useDoc)
    print("iter : ", iter)
    print("loading data")
    if regression:
        labels = pickle.load(open(os.path.join(path, 'answers_RT.pkl'), "rb"))
    else:
        labels = pickle.load(open(os.path.join(path, "answers.pkl"), "rb"))
    doc_vectors = pickle.load((open(os.path.join(path, "corpus_features.pkl"),
                                    "rb")))
    doc_vectors = np.array(doc_vectors, dtype=np.float32)

    labels = list(map(int, labels))
    label_max = max(labels) + 1

    # split train test data
    if saved_path == "":
        dir = ""
    else:
        dir = os.path.dirname(saved_path)
    print("dir is ", dir)
    if dir != "" and os.path.exists(os.path.join(
            dir, "train_perm.pkl")) and os.path.exists(
                os.path.join(dir, "test_perm.pkl")) and regression:
        train_perm = pickle.load(
            open(os.path.join(dir, "train_perm.pkl"), "rb"))
        test_perm = pickle.load(open(os.path.join(dir, "test_perm.pkl"), "rb"))
        print("loaded")
        N = len(train_perm)
        N_test = len(test_perm)
    else:
        print("normalarize")
        split_perm = create_split_perm(regression, labels, path)
        db_len = len(split_perm)
        N_test = int(train_test_rate * db_len)
        N = int(db_len - N_test)
        train_perm = split_perm[:N]
        test_perm = split_perm[N:]
        print("train:", len(train_perm))
        print("test:", len(test_perm))
        pickle.dump(train_perm,
                    open(os.path.join(load_path, "train_perm.pkl"), "wb"))
        pickle.dump(test_perm,
                    open(os.path.join(load_path, "test_perm.pkl"), "wb"))

    if regression:
        labels = np.array(labels, dtype=np.float32).reshape(len(labels), 1)
    else:
        labels = np.array(labels, dtype=np.int32)

    # make model
    print("make model")
    train_losses = []
    train_scores = []
    test_move = []
    if regression:
        worker = twitterNet_worker(1, min(labels))
    else:
        worker = twitterNet_worker(label_max, min(labels))
    if saved_path != "" and os.path.exists(saved_path):
        loaded_epoch = worker.load(saved_path)
        train_losses = pickle.load(
            open(os.path.join(load_path, "train_losses.pkl"),
                 "rb"))[:loaded_epoch - 1]
        train_scores = pickle.load(
            open(os.path.join(load_path, "train_scores.pkl"),
                 "rb"))[:loaded_epoch - 1]
        test_move = pickle.load(
            open(os.path.join(load_path, "test_score.pkl"),
                 "rb"))[:int((loaded_epoch - 1) / 5)]
        print(len(train_losses))
        print(len(train_scores))
        print(len(test_move))
    else:
        loaded_epoch = 1
    if gpu:
        cuda.get_device(0).use()
        worker.model.to_gpu()

    for epoch in six.moves.range(loaded_epoch, n_epoch + 1):
        print('epoch', epoch)
        # training
        perm = np.random.permutation(N)
        sum_loss = 0
        p = ProgressBar(max_value=N, min_value=1)
        myh = []  #トレーニング用
        myt = []
        for i in six.moves.range(0, N, batchsize):
            p.update(i + 1)
            x_batch = get_images(train_perm[perm[i:i + batchsize]], path)
            x_batch_doc = doc_vectors[train_perm[perm[i:i + batchsize]]]
            y_batch = labels[train_perm[perm[i:i + batchsize]]]
            # Pass the loss function (Classifier defines it) and its arguments
            loss, h, t = worker.train(x_batch,
                                      x_batch_doc,
                                      y_batch,
                                      regression=regression,
                                      gpu=gpu,
                                      useImage=useImage,
                                      useDoc=useDoc)
            sum_loss += float(loss) * len(y_batch)
            myh.extend(list(h))
            myt.extend(list(t))
        p.finish()
        if regression:
            pred = np.corrcoef(np.array(myh), np.array(myt))[0, 1]
        else:
            pred = np.mean(np.array(myh))
        print("train mean loss: %f" % (sum_loss / N))
        print("train mean corr: %f" % (pred))
        train_losses.append(sum_loss / N)
        train_scores.append(pred)
        pickle.dump(train_losses,
                    open(os.path.join(load_path, "train_losses.pkl"), "wb"))
        pickle.dump(train_scores,
                    open(os.path.join(load_path, "train_scores.pkl"), "wb"))
        # test
        if epoch % 5 == 0 and epoch != 0:
            sum_accuracy = [0.0] * label_max
            total_acc_elem = [0] * label_max
            sum_corr = 0
            myh = []  #テスト用
            myt = []
            p = ProgressBar(max_value=N_test, min_value=1)
            for i in range(0, N_test, batchsize):
                p.update(i + 1)
                x_batch = get_images(test_perm[i:i + batchsize], path)
                x_batch_doc = doc_vectors[test_perm[i:i + batchsize]]
                y_batch = labels[test_perm[i:i + batchsize]]

                if regression:
                    h, t = worker.test(x_batch,
                                       x_batch_doc,
                                       y_batch,
                                       regression=regression,
                                       gpu=gpu,
                                       useImage=useImage,
                                       useDoc=useDoc)
                    myh.extend(list(h))
                    myt.extend(list(t))
                else:
                    # ラベルごとの精度を出す
                    acc = [0.0] * label_max
                    for label in range(label_max):
                        labeled_perm = [
                            x for x in np.arange(len(y_batch))
                            if y_batch[x] == label
                        ]
                        if len(labeled_perm) != 0:
                            acc[label] = worker.test(
                                x_batch[labeled_perm],
                                x_batch_doc[labeled_perm],
                                y_batch[labeled_perm],
                                regression=regression,
                                gpu=gpu,
                                useImage=useImage,
                                useDoc=useDoc) * len(labeled_perm)
                        total_acc_elem[label] += len(labeled_perm)
                    sum_accuracy = [
                        s + float(t) for s, t in zip(sum_accuracy, acc)
                    ]
            p.finish()
            if regression:
                corr = np.corrcoef(np.array(myh), np.array(myt))[0, 1]
                print("test mean corr: %f" % (corr))
                test_move.append(corr)
                pickle.dump(
                    test_move,
                    open(os.path.join(load_path, "test_score.pkl"), "wb"))
            else:
                sum_accuracy = [
                    t / float(u) for t, u in zip(sum_accuracy, total_acc_elem)
                ]
                print("\n".join(map(str, sum_accuracy)))
                print("mean:", np.mean(sum_accuracy))
                test_move.append(np.mean(sum_accuracy))
                pickle.dump(
                    test_move,
                    open(os.path.join(load_path, "test_score.pkl"), "wb"))

        if epoch % 5 == 0 and epoch != 0:
            # Save the model and the optimizer
            print('save the model')
            worker.save(load_path, epoch)
Example #4
0
def train_and_test(path, gpu_id, load_path, saved_path, regression, useImage, useDoc, iter):
    train_test_rate = 0.2
    batchsize = 25
    n_epoch = iter
    gpu = gpu_id >= 0
    print("reg : ", regression)
    print("gpu : ", gpu_id)
    print("path : ", path)
    print("load path : ", load_path)
    print("save path : ", saved_path)
    print("img : ", useImage)
    print("doc : ", useDoc)
    print("iter : ", iter)
    print("loading data")
    if regression:
        labels = pickle.load(open(os.path.join(path, 'answers_RT.pkl'), "rb"))
    else:
        labels = pickle.load(open(os.path.join(path, "answers.pkl"), "rb"))
    doc_vectors = pickle.load((open(os.path.join(path, "corpus_features.pkl"), "rb")))
    doc_vectors = np.array(doc_vectors, dtype=np.float32)

    labels = list(map(int, labels))
    label_max = max(labels) + 1

    # split train test data
    if saved_path == "":
        dir = ""
    else:
        dir = os.path.dirname(saved_path)
    print("dir is ", dir)
    if dir != "" and os.path.exists(os.path.join(dir, "train_perm.pkl")) and os.path.exists(os.path.join(dir, "test_perm.pkl")) and regression:
        train_perm = pickle.load(open(os.path.join(dir, "train_perm.pkl"), "rb"))
        test_perm = pickle.load(open(os.path.join(dir, "test_perm.pkl"), "rb"))
        print("loaded")
        N = len(train_perm)
        N_test = len(test_perm)
    else:
        print("normalarize")
        split_perm = create_split_perm(regression, labels, path)
        db_len=len(split_perm)
        N_test = int(train_test_rate*db_len)
        N = int(db_len-N_test)
        train_perm = split_perm[:N]
        test_perm = split_perm[N:]
        print("train:", len(train_perm))
        print("test:", len(test_perm))
        pickle.dump(train_perm, open(os.path.join(load_path, "train_perm.pkl"), "wb"))
        pickle.dump(test_perm, open(os.path.join(load_path, "test_perm.pkl"), "wb"))

    if regression:
        labels = np.array(labels, dtype=np.float32).reshape(len(labels), 1)
    else:
        labels = np.array(labels, dtype=np.int32)

    # make model
    print("make model")
    train_losses = []
    train_scores = []
    test_move = []
    if regression:
        worker = twitterNet_worker(1, min(labels))
    else:
        worker = twitterNet_worker(label_max, min(labels))
    if saved_path != "" and os.path.exists(saved_path):
        loaded_epoch = worker.load(saved_path)
        train_losses = pickle.load(open(os.path.join(load_path, "train_losses.pkl"), "rb"))[:loaded_epoch-1]
        train_scores = pickle.load(open(os.path.join(load_path, "train_scores.pkl"), "rb"))[:loaded_epoch-1]
        test_move = pickle.load(open(os.path.join(load_path, "test_score.pkl"), "rb"))[:int((loaded_epoch-1)/5)]
        print(len(train_losses))
        print(len(train_scores))
        print(len(test_move))
    else:
        loaded_epoch = 1
    if gpu:
        cuda.get_device(0).use()
        worker.model.to_gpu()

    for epoch in six.moves.range(loaded_epoch, n_epoch + 1):
        print('epoch', epoch)
        # training
        perm = np.random.permutation(N)
        sum_loss = 0
        p = ProgressBar(max_value=N, min_value=1)
        myh = [] #トレーニング用
        myt = []
        for i in six.moves.range(0, N, batchsize):
            p.update(i+1)
            x_batch = get_images(train_perm[perm[i:i + batchsize]], path)
            x_batch_doc = doc_vectors[train_perm[perm[i:i + batchsize]]]
            y_batch = labels[train_perm[perm[i:i + batchsize]]]
            # Pass the loss function (Classifier defines it) and its arguments
            loss, h, t = worker.train(x_batch, x_batch_doc, y_batch, regression=regression, gpu=gpu, useImage=useImage, useDoc=useDoc)
            sum_loss += float(loss) * len(y_batch)
            myh.extend(list(h))
            myt.extend(list(t))
        p.finish()
        if regression:
            pred = np.corrcoef(np.array(myh), np.array(myt))[0, 1]
        else:
            pred = np.mean(np.array(myh))
        print("train mean loss: %f" % (sum_loss / N))
        print("train mean corr: %f" % (pred))
        train_losses.append(sum_loss / N)
        train_scores.append(pred)
        pickle.dump(train_losses, open(os.path.join(load_path, "train_losses.pkl"), "wb"))
        pickle.dump(train_scores, open(os.path.join(load_path, "train_scores.pkl"), "wb"))
        # test
        if epoch % 5 == 0 and epoch != 0:
            sum_accuracy = [0.0] * label_max
            total_acc_elem = [0] * label_max
            sum_corr = 0
            myh = [] #テスト用
            myt = []
            p = ProgressBar(max_value=N_test, min_value=1)
            for i in range(0, N_test, batchsize):
                p.update(i+1)
                x_batch = get_images(test_perm[i:i + batchsize], path)
                x_batch_doc = doc_vectors[test_perm[i:i + batchsize]]
                y_batch = labels[test_perm[i:i + batchsize]]

                if regression:
                    h, t = worker.test(x_batch, x_batch_doc, y_batch, regression=regression, gpu=gpu, useImage=useImage, useDoc=useDoc)
                    myh.extend(list(h))
                    myt.extend(list(t))
                else:
                    # ラベルごとの精度を出す
                    acc = [0.0] * label_max
                    for label in range(label_max):
                        labeled_perm = [x for x in np.arange(len(y_batch)) if y_batch[x] == label]
                        if len(labeled_perm) != 0:
                            acc[label] = worker.test(x_batch[labeled_perm], x_batch_doc[labeled_perm], y_batch[labeled_perm],
                                             regression=regression, gpu=gpu, useImage=useImage, useDoc=useDoc)*len(labeled_perm)
                        total_acc_elem[label] += len(labeled_perm)
                    sum_accuracy = [s+float(t) for s, t in zip(sum_accuracy, acc)]
            p.finish()
            if regression:
                corr = np.corrcoef(np.array(myh), np.array(myt))[0, 1]
                print("test mean corr: %f" % (corr))
                test_move.append(corr)
                pickle.dump(test_move, open(os.path.join(load_path, "test_score.pkl"), "wb"))
            else:
                sum_accuracy = [t / float(u) for t, u in zip(sum_accuracy, total_acc_elem)]
                print("\n".join(map(str, sum_accuracy)))
                print("mean:", np.mean(sum_accuracy))
                test_move.append(np.mean(sum_accuracy))
                pickle.dump(test_move, open(os.path.join(load_path, "test_score.pkl"), "wb"))

        if epoch % 5 == 0 and epoch != 0:
            # Save the model and the optimizer
            print('save the model')
            worker.save(load_path, epoch)