コード例 #1
0
def ComputePrecisionK(modelfile, testfile, K_list):

    maxParagraphLength = 10
    maxParagraphs = 4
    #nlabels=1001
    #vocabularySize=76391
    labels = 8
    vocabularySize = 244
    model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize)

    testing = DataParser(maxParagraphLength, maxParagraphs, labels,
                         vocabularySize)
    print(testfile)
    testing.getDataFromfile(testfile)
    print("data loading done")
    print("no of test examples: " + str(testing.totalPages))

    model.load(modelfile)

    print("model loading done")

    batchSize = 1

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    precAtK = {}
    for itr in K_list:
        precAtK[itr] = 0

    for i, v in enumerate(pred):
        temp = [(labId, labProb) for labId, labProb in enumerate(v)]
        #     print(temp)
        temp = sorted(temp, key=lambda x: x[1], reverse=True)
        for ele in K_list:
            pBag = 0
            for itr in range(ele):
                if truePre[i][0][temp[itr][0]] == 1:
                    pBag += 1
        #         print(float(pBag)/float(ele))
            precAtK[ele] += float(pBag) / float(ele)

    f = open("results/precAtK_model3_n", "w")
    for key in sorted(precAtK.keys()):
        #     print(key, precAtK[key]/len(pred))
        print(precAtK[key] / len(pred))
        f.write(str(key) + "\t" + str(precAtK[key] / len(pred)) + "\n")
    f.close()
コード例 #2
0
    def _initialize(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._net = Model3(self._num_users,
                          self._num_items,
                          self.model_args).to(self._device)

        self._optimizer = optim.Adam(self._net.parameters(),
                                     weight_decay=self._l2,
                                     lr=self._learning_rate)
コード例 #3
0
from model3 import Model3 as Model

maxParagraphLength = 100
maxParagraphs = 1
#nlabels=1001
#vocabularySize=76391
nlabels = 8
vocabularySize = 244
training = DataParser(maxParagraphLength, maxParagraphs, nlabels,
                      vocabularySize)
#training.getDataFromfile("data/wiki_fea_76390_Label_1000_train")
training.getDataFromfile(
    "C:/gitrepo/Wiki-Text-Categorization/Distant Supervision/Reuter_dataset/reuters_sparse_training.txt"
)

model = Model(maxParagraphLength, maxParagraphs, nlabels, vocabularySize)

batchSize = 64

epoch = 0
epochEnd = 105
for e in range(epoch, epochEnd):
    print('Epoch: ' + str(e + 1))
    cost = 0
    for itr in range(int(training.totalPages / batchSize)):
        cost += model.train(training.nextBatch(batchSize))
    print(str(cost / training.totalPages))

    if (e + 1) % 10 == 0 and e > 60:
        print('saving model..')
        model.save("models/model3_reuter_" + str(e + 1))
コード例 #4
0
ファイル: train.py プロジェクト: gandhi-21/mnist__ensemble
    tf.cast(tf.equal(tf.argmax(predictions1, 1), tf.argmax(model1.Y, 1)),
            tf.float32))

# Make model 2
model2 = Model2(X2, Y2, keep_prob2)
logits2, predictions2 = model2.build()
loss_op2 = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=model2.Y2))
train_op2 = tf.train.AdamOptimizer(
    learning_rate=model2.learning_rate).minimize(loss_op2)
accuracy2 = tf.reduce_mean(
    tf.cast(tf.equal(tf.argmax(predictions2, 1), tf.argmax(model2.Y2, 1)),
            tf.float32))

# Make model 3
model3 = Model3(X3, Y3, keep_prob3)
logits3, predictions3 = model3.build()
loss_op3 = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits3, labels=model3.Y3))
train_op3 = tf.train.AdamOptimizer(
    learning_rate=model3.learning_rate).minimize(loss_op3)
accuracy3 = tf.reduce_mean(
    tf.cast(tf.equal(tf.argmax(predictions3, 1), tf.argmax(model3.Y3, 1)),
            tf.float32))

# # Make model 4
model4 = Model4(logitse1, logitse2, Y4)
logits4, predictions4 = model4.build()
loss_op4 = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits4, labels=model4.Y4))
train_op4 = tf.train.AdamOptimizer(
コード例 #5
0
def ComputeFscore(modelfile, testfile, outputfile):
    maxParagraphLength = int(sys.argv[1])
    maxParagraphs = int(sys.argv[2])
    filterSizes = [int(i) for i in sys.argv[3].split("-")]
    num_filters = int(sys.argv[4])
    wordEmbeddingDimension = int(sys.argv[5])
    # batchSize= int(sys.argv[6])
    # epochs= int(sys.argv[7])
    # folder_name = sys.argv[8]
    # output = sys.argv[9]
    lrate = sys.argv[10]
    poolLength = int(sys.argv[11])

    labels = 8
    vocabularySize = 244

    model = Model(maxParagraphs, maxParagraphLength, labels, vocabularySize,
                  filterSizes, num_filters, poolLength, wordEmbeddingDimension,
                  lrate)

    testing = DataParser(maxParagraphs, maxParagraphLength, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    labelsCount = {}
    ConfusionMa = {}
    fScr = {}

    thres = 0.5
    valid = int(
        len(truePre) * 0.5
    )  #using first 50% data for threshold tuning - we have merged test and cv files
    labelsCount = {}
    ConfusionMa = {}
    fScr = {}
    thresLab = {}
    for la in range(labels):
        if la % 25 == 0:
            print("Current label", la)
        t = []
        p = []
        for i in range(valid):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])
        bestF, bestThre = thresholdTuning(t, p)

        t = []
        p = []
        for i in range(valid, len(truePre)):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])

        p = np.array(p)
        fScr[la] = f1_score(t, p >= bestThre)
        ConfusionMa[la] = confusion_matrix(t, p > bestThre)
        thresLab[la] = bestThre

    f = open(outputfile, "a")
    output = sys.argv[9]

    sum_fscore = 0.0
    for i in range(labels):
        sum_fscore = sum_fscore + fScr[i]
        output = output + "," + str(fScr[i])
    output += "," + str(sum_fscore / float(labels - 1))
    print("Fscore at " + sys.argv[7] + " epochs: " +
          str(sum_fscore / float(labels - 1)))
    f.write(output + "\n")
    f.close()
コード例 #6
0
def ComputeFscore(modelfile, testfile, outputfile):
    maxParagraphLength = 20
    maxParagraphs = 10
    #nlabels=1001
    #vocabularySize=76391
    labels = 8
    vocabularySize = 244
    model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize)

    testing = DataParser(maxParagraphLength, maxParagraphs, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    labelsCount = {}
    ConfusionMa = {}
    fScr = {}

    thres = 0.5
    valid = int(
        len(truePre) * 0.5
    )  #using first 50% data for threshold tuning - we have merged test and cv files
    labelsCount = {}
    ConfusionMa = {}
    fScr = {}
    thresLab = {}
    for la in range(labels):
        if la % 25 == 0:
            print("Current label", la)
        t = []
        p = []
        for i in range(valid):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])
        bestF, bestThre = thresholdTuning(t, p)

        t = []
        p = []
        for i in range(valid, len(truePre)):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])

        p = np.array(p)
        fScr[la] = f1_score(t, p >= bestThre)
        ConfusionMa[la] = confusion_matrix(t, p > bestThre)
        thresLab[la] = bestThre

    f = open(outputfile, "w")
    sum_fscore = 0.0
    for i in range(labels):

        sum_fscore = sum_fscore + fScr[i]
        inp = str(i) + "," + str(thresLab[i]) + "," + str(fScr[i]) + "\n"
        f.write(inp)
    f.write(str(sum_fscore / float(labels - 1)))

    print(sum_fscore)
    print(sum_fscore / float((labels - 1)))
    f.close()
    return (sum_fscore / float((labels - 1)))
コード例 #7
0
num_filters = int(sys.argv[4])
wordEmbeddingDimension = int(sys.argv[5])
batchSize = int(sys.argv[6])
epochEnd = int(sys.argv[7])
folder_name = sys.argv[8]
lrate = float(sys.argv[9])
poolLength = int(sys.argv[10])
nlabels = 10
vocabularySize = 101940

training = DataParser(maxParagraphs, paragraphLength, nlabels, vocabularySize)
training.getDataFromfile(
    "../dataset/preprocessed_data/toplabels_split/wiki10-top10labels_train.txt"
)
model = Model(maxParagraphs, paragraphLength, nlabels, vocabularySize,
              filterSizes, num_filters, poolLength, wordEmbeddingDimension,
              lrate)

costfile = open("results/costfile.txt", "a")
output = folder_name

epoch = 0
# epochEnd=400
costepochs = []

for e in range(epoch, epochEnd):

    cost = 0

    for itr in range(int(training.totalPages / batchSize)):
        cost += model.train(training.nextBatch(batchSize))