Ejemplo n.º 1
0
def ComputePrecisionK(modelfile, testfile, K_list):

    CURRENT_DIR = os.path.dirname(os.path.abspath("./WikiCategoryLabelling/"))
    sys.path.append(os.path.dirname(CURRENT_DIR + "/WikiCategoryLabelling/"))

    maxParagraphLength = 250
    maxParagraphs = 10
    labels = 1001
    vocabularySize = 76390
    model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize)

    testing = DataParser(maxParagraphLength, maxParagraphs, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)
    print("data loading done")
    print("no of test examples: " + str(testing.totalPages))

    model.load(modelfile)

    print("model loading done")

    batchSize = 10

    testing.restore()
    truePre = []
    pred = []
    for i in range(math.ceil(testing.totalPages / batchSize)):
        if i < testing.totalPages / batchSize:
            data = testing.nextBatch(batchSize)
        else:
            data = testing.nextBatch(testing.totalPages % batchSize)
        truePre.extend(data[0])
        pre = model.predict(data)
        pred.extend(pre[0].tolist())

    avgPrecK = [0] * len(K_list)
    for i, p in enumerate(pred):
        sortedL = sorted(range(len(p)), key=p.__getitem__, reverse=True)
        for k, K in enumerate(K_list):
            labelK = sortedL[:K]
            precK = 0
            for l in labelK:
                if truePre[i][l] == 1:
                    precK += 1
            avgPrecK[k] += precK / float(K)
    avgPrecK = [float(a) / len(pred) for a in avgPrecK]

    for p in avgPrecK:
        print(str(p))
Ejemplo n.º 2
0
def main():
    FROM = datetime.utcnow().replace(hour=0, minute=0, second=0,
                                     microsecond=0).timestamp()
    TO = datetime.utcnow().timestamp()
    PAIR = 'BTCEUR'

    db = psycopg2.connect('dbname=johnny5 user=johnny5')
    cur = db.cursor()

    opts, args = getopt(sys.argv[1:], 'f:lp:t:')

    for o, a in opts:
        if o == '-f':
            FROM = date2ts(a)
        elif o == '-p':
            PAIR = a
        elif o == '-t':
            TO = date2ts(a)
        elif o == '-l':
            cur.execute('SELECT MIN(ts) FROM historical')
            FROM = cur.fetchone()[0]
        else:
            raise Exception("Unknown option: %s" % o)

    cur.execute('SELECT id, kname FROM pairs WHERE name=%s', (PAIR, ))
    pair_id, KPAIR = cur.fetchone()

    df = open('plot.data', 'wt')
    for pct_high in range(1, 50):
        for pct_low in range(1, 50):

            print()
            print("pct_low=%d, pct_high=%d" % (pct_low, pct_high))
            f = HistoricalFeed(db, pair_id, FROM, 600 * 2)
            m = Model2(db,
                       pair_id,
                       6 * 3600,
                       600 * 2,
                       pct_low=pct_low,
                       pct_high=pct_high)

            while True:
                try:
                    ts, value = f.next()
                except Feed.NoMore:
                    break
                if ts >= TO: break
                action = m.newpoint(ts, value)
            df.write('%d %d %0.2f\n' %
                     (pct_high, pct_low, m.total_balance(value)))
            sys.stdout.flush()

        df.write('\n')

    f.close()
Ejemplo n.º 3
0
    def _initialize(self, interactions):
        self._num_items = interactions.num_items
        self._num_users = interactions.num_users

        self.test_sequence = interactions.test_sequences

        self._net = Model2(self._num_users, self._num_items,
                           self.model_args).to(self._device)

        self._optimizer = optim.Adam(self._net.parameters(),
                                     weight_decay=self._l2,
                                     lr=self._learning_rate)
Ejemplo n.º 4
0
def main(args):
    start = time.time()

    # seed number generator for experiment reproducability
    np.random.seed(args.seed)

    X_train, y_train = load_SPECT_data(args.train_path)
    X_test, y_test = load_SPECT_data(args.test_path)

    # create balanced validation set from test set 
    X_val = X_test[-30:,:]
    y_val = y_test[-30:]

    # instanciate model
    if args.num_layers == 1:
        dimensions = [X_train.shape[1], args.num_neurons1, 2]
        model = Model(dimensions, args.lr, activation=args.activation)
    elif args.num_layers == 2:
        dimensions = [X_train.shape[1], args.num_neurons1, args.num_neurons2, 2]
        model = Model2(dimensions, args.lr, activation=args.activation)

    # train model
    history = model.fit(X_train, y_train, X_val, y_val, args.num_epochs, args.batch_size, args.patience)

    # determine accuracies for data sets
    train_acc, train_metrics = model.evaluate(X_train, y_train)
    val_acc, val_metrics = model.evaluate(X_val, y_val)
    test_acc, test_metrics = model.evaluate(X_test, y_test)
    print(f'Training accuracy:   {train_acc:.4f}, precision: {train_metrics["precision"]:.4f},', \
          f'recall: {train_metrics["recall"]:.4f}, F1: {train_metrics["f1"]:.4f}')
    print(f'Validation accuracy: {val_acc:.4f}, precision: {val_metrics["precision"]:.4f},', \
          f'recall: {val_metrics["recall"]:.4f}, f1: {val_metrics["f1"]:.4f}')
    print(f'Test accuracy: {test_acc:.4f}, precision: {test_metrics["precision"]:.4f},', \
          f'recall: {test_metrics["recall"]:.4f}, f1: {test_metrics["f1"]:.4f}')

    plot_history(history)

    print(f'Script completed in {time.time()-start:.2f} secs')

    return 0
Ejemplo n.º 5
0
    def __init__(self,
                 pop_size=64,
                 n_process=None,
                 n_eval=1,
                 p_keep=0.5,
                 n_candidate_eval=8,
                 n_candidates=3,
                 no_rew_early_stop=20,
                 rnn_size=256,
                 controller_size=128,
                 output_size=3):

        self.pop_size = pop_size
        self.n_process = n_process
        self.n_candidate_eval = n_candidate_eval
        self.n_candidates = n_candidates

        self.no_rew_early_stop = no_rew_early_stop
        if n_process is None:
            self.n_process = os.cpu_count()

        self.n_eval = n_eval
        self.p_keep = p_keep
        self.rnn_size = rnn_size
        self.controller_size = controller_size
        self.output_size = output_size

        self.elite = Model2(no_rew_early_stop,
                            rnn_size=rnn_size,
                            controller_size=controller_size)

        self.gen = 0

        self.results_queue = Queue()
        self.training_queue = Queue()
        self.pool = Pool(self.n_process, distribute,
                         (self.training_queue, self.results_queue, True))
Ejemplo n.º 6
0
def main():
    FROM = datetime.utcnow().replace(hour=0, minute=0, second=0,
                                     microsecond=0).timestamp()
    TO = datetime.utcnow().timestamp()
    PAIR = 'BTCEUR'

    db = psycopg2.connect('dbname=johnny5 user=johnny5')
    cur = db.cursor()

    opts, args = getopt(sys.argv[1:], 'f:lp:t:')

    for o, a in opts:
        if o == '-f':
            FROM = date2ts(a)
        elif o == '-p':
            PAIR = a
        elif o == '-t':
            TO = date2ts(a)
        elif o == '-l':
            cur.execute('SELECT MIN(ts) FROM historical')
            FROM = cur.fetchone()[0]
        else:
            raise Exception("Unknown option: %s" % o)

    cur.execute('SELECT id, kname FROM pairs WHERE name=%s', (PAIR, ))
    pair_id, KPAIR = cur.fetchone()

    f = HistoricalFeed(db, pair_id, FROM, 600)
    m = Model2(db, pair_id, 6 * 3600, 600 * 2)

    while True:
        try:
            ts, value = f.next()
        except Feed.NoMore:
            break
        if ts >= TO: break
        action = m.newpoint(ts, value)
Ejemplo n.º 7
0
from DataParser import DataParser
from model2 import Model2 as Model

# In[ ]:

maxParagraphLength = 250
maxParagraphs = 10
labels = 1000
vocabularySize = 15000
model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize)
training = DataParser(maxParagraphLength, maxParagraphs, labels,
                      vocabularySize)
training.getDataFromfile("data/vocab_3L_l1000_sampled_10000_red_train.txt")

batchSize = 50

epoch = 0
epochEnd = 10
for e in range(epoch, epochEnd):
    print 'Epoch: ' + str(e)
    cost = 0
    for itr in range(int(training.totalPages / batchSize)):
        cost += model.train(training.nextBatch(batchSize))
        #break
    print(str(cost))

    if e % 10 == 0:
        model.save("model2_l1000_" + str(e))
Ejemplo n.º 8
0
logitse1 = tf.placeholder(tf.float32, shape=[None, 10])
logitse2 = tf.placeholder(tf.float32, shape=[None, 10])

# Make model 1
model1 = Model1(X, Y, keep_prob)
logits1, predictions1 = model1.build()
loss_op1 = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=model1.Y))
train_op1 = tf.train.AdamOptimizer(
    learning_rate=model1.learning_rate).minimize(loss_op1)
accuracy1 = tf.reduce_mean(
    tf.cast(tf.equal(tf.argmax(predictions1, 1), tf.argmax(model1.Y, 1)),
            tf.float32))

# Make model 2
model2 = Model2(X2, Y2, keep_prob2)
logits2, predictions2 = model2.build()
loss_op2 = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=model2.Y2))
train_op2 = tf.train.AdamOptimizer(
    learning_rate=model2.learning_rate).minimize(loss_op2)
accuracy2 = tf.reduce_mean(
    tf.cast(tf.equal(tf.argmax(predictions2, 1), tf.argmax(model2.Y2, 1)),
            tf.float32))

# Make model 3
model3 = Model3(X3, Y3, keep_prob3)
logits3, predictions3 = model3.build()
loss_op3 = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits3, labels=model3.Y3))
train_op3 = tf.train.AdamOptimizer(
Ejemplo n.º 9
0
from model2 import Model2

factor = 0.2

params = Model2.params

outputs = {}

for key in params.keys():
    original = params[key]
    outputs[key] = {}

    # Increase by factor
    params[key] = original * (1 + factor)
    model = Model2(params)
    outputs[key]['increased'] = model.solve()

    # Decrease by factor
    params[key] = original * (1 - factor)
    model = Model2(params)
    outputs[key]['decreased'] = model.solve()

    # Original value
    params[key] = original
    model = Model2(params)
    outputs[key]['original'] = model.solve()

    # range
    outputs[key][
        'range'] = outputs[key]['increased'] - outputs[key]['decreased']
Ejemplo n.º 10
0
        model1.fit(*train1)
        model1.save_model(model1_path)
        train1_tags = list(chain(*train1[1]))
        print(f'Model 1 Train Accuracy: {accuracy(train1_tags, train1_tags)}')
    finally:
        test1 = preprocess(Path('data/test1.wtag'))
        test1_true_tags = list(chain(*test1[1]))
        test1_pred_tags = list(chain(*model1.predict(test1[0])))
        print(f'Model 1 Test Accuracy: {accuracy(test1_true_tags, test1_pred_tags)}')
        confusion = confusion_matrix(test1_true_tags, test1_pred_tags, model1.tag_vocabulary, n=10)
        tags = [tag for tag in model1.tag_vocabulary if (tag, tag) in confusion]
        tags.sort(key=lambda tag: confusion[tag, tag])
        print(f'Model 1 Test Confusion Matrix:')
        print(''.ljust(5) + ''.join(t.ljust(5) for t in tags))
        rows = [r.ljust(5) + ''.join([f'{confusion[r, c]:.2f}'.ljust(5) for c in tags]) for r in tags]
        print('\n'.join(rows))

    # Model 2
    model2_path = Path('model/model2')
    try:
        model2 = load_model(model2_path)
    except FileNotFoundError:
        train2 = preprocess(Path('data/train2.wtag'))
        model2 = Model2(lambda_=0.1, beam=3)
        model2.fit(*train2)
        model2.save_model(model2_path)
        train2_tags = list(chain(*train2[1]))
        print(f'Model 2 Train Accuracy: {accuracy(train2_tags, train2_tags)}')

    print(f'Total runtime: {time() - total:.2f}s')
Ejemplo n.º 11
0
def ComputeFscore(modelfile, testfile, outputfile):

    CURRENT_DIR = os.path.dirname(os.path.abspath("./WikiCategoryLabelling/"))
    sys.path.append(os.path.dirname(CURRENT_DIR + "/WikiCategoryLabelling/"))

    maxParagraphLength = 250
    maxParagraphs = 10
    labels = 1000
    vocabularySize = 150000
    model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize)

    testing = DataParser(maxParagraphLength, maxParagraphs, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch()
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    labelsCount = {}
    ConfusionMa = {}
    fScr = {}

    thres = 0.5
    valid = int(len(truePre) * 0.35)
    labelsCount = {}
    ConfusionMa = {}
    fScr = {}
    thresLab = {}
    for la in range(1000):
        if la % 25 == 0:
            print("Currnet label", la)
        t = []
        p = []
        for i in range(valid):
            t.append(truePre[i][la])
            p.append(pred[i][la])
        bestF, bestThre = thresholdTuning(t, p)

        t = []
        p = []
        for i in range(valid, len(truePre)):
            t.append(truePre[i][la])
            p.append(pred[i][la])

        p = np.array(p)
        fScr[la] = f1_score(t, p >= bestThre)
        ConfusionMa[la] = confusion_matrix(t, p > bestThre)
        thresLab[la] = bestThre

    f = open(outputfile, "w")
    for i in range(1000):
        inp = str(i) + "," + str(thresLab[i]) + "," + str(fScr[i]) + "\n"
        f.write(inp)
    f.close()