コード例 #1
0
ファイル: construct_tol.py プロジェクト: sharadmv/trees
    'stingray': 'Hexatrygonidae',
    'wolf': 'Canis lupus',
    'vampire': 'Desmodus rotundus',
    'wasp': 'Hymenoptera',
    'tuna': 'Thunnini',
    'tortoise': 'Testudinidae',
    'termite': 'Termitidae',
    'vole': 'Arvicolinae',
    'tuatara': 'Hatteria punctata',
    'wren': 'Troglodytidae',
}

def convert(a):
    return CONVERSION.get(a, a)

X, y = load('zoo')
if os.path.exists('./ids.pkl'):
    with open('./ids.pkl', 'rb') as fp:
        ids = pickle.load(fp)
else:
    tax = api.taxomachine
    animals = [convert(a.replace('+', ' ')) for a in y]
    ids = []
    for animal in tqdm(animals):
        result = tax.TNRS([animal])['results']
        if len(result) == 0:
            print "Failed:", animal
        result = result[0]
        match = result['matches'][0]
        ids.append(match['ot:ottId'])
    with open('./ids.pkl', 'wb') as fp:
コード例 #2
0
ファイル: offline_mnist.py プロジェクト: islamazhar/trees
import seaborn as sns
sns.set_style("white")
import logging
logging.basicConfig(level=logging.INFO)
import cPickle as pickle
import numpy as np
from sklearn.decomposition import PCA
from path import Path
from tqdm import tqdm

from trees.data import load
from trees.interact import Database, Interactor
from trees.ddt import GaussianLikelihoodModel, DirichletDiffusionTree, Inverse
from trees.mcmc import MetropolisHastingsSampler

mnist = load('mnist')
database = Database('mnist')
interactor = Interactor(mnist, database)

X, y = mnist.X, mnist.y

X = X.astype(np.float32)
X /= 255.0

X -= X.mean(axis=0)

logging.debug("Finding PCA...")
pca_path = Path("pca.pkl")
if pca_path.exists():
    with open(pca_path, 'rb') as fp:
        pca = pickle.load(fp)
コード例 #3
0
ファイル: dasgupta.py プロジェクト: sharadmv/trees
from cStringIO import StringIO
import numpy as np
import logging
# logging.basicConfig(level=logging.DEBUG)
import matplotlib.pyplot as plt
import seaborn as sns
from trees.dasgupta import DasguptaTree
from trees.mcmc import SPRSampler
from scipy.spatial.distance import pdist, squareform
from tqdm import tqdm
from itertools import combinations

from trees.data import load
from sklearn.decomposition import PCA

data = load('zoo')
X, y = data.X, data.y

pca = PCA(10)
X = pca.fit_transform(X)
X += np.random.normal(size=X.shape) * 0.01

N = X.shape[0]
np.random.seed(0)
# idx = np.random.permutation(np.arange(N))[:20]

# X = X[idx]
# y = np.array(y)
# y = y[idx]

D = 1.0 / squareform(pdist(X))
コード例 #4
0
ファイル: experiment.py プロジェクト: sharadmv/trees
        pickle.dump(costs, fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp:
        # previous_trees = pickle.load(fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp:
        # pickle.dump(previous_trees + [t.get_state() for t in trees], fp)
    with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp:
        pickle.dump(sampler.tree.get_state(), fp)
    return costs, scores, sampler

if __name__ == "__main__":
    args = parse_args()

    out_dir = Path(args.out_dir) / args.data
    out_dir.mkdir_p()
    dataset_name = args.data
    dataset = load(dataset_name)
    X, y = dataset.X, dataset.y

    if dataset_name == 'mnist' or dataset_name == 'iris' or dataset_name == '20news':
        np.random.seed(0)
        idx = np.random.permutation(xrange(X.shape[0]))[:args.subset]
        X = X[idx]
        y = y[idx]
    if dataset_name == 'mnist' or dataset_name == '20news':
        pca = PCA(10)
        X = pca.fit_transform(X)
    if dataset_name == 'zoo':
        # pca = PCA(5)
        # X = pca.fit_transform(X)
        X += np.random.normal(size=X.shape) * 0.01
コード例 #5
0
ファイル: var.py プロジェクト: islamazhar/trees
import numpy as np
# logging.basicConfig(level=logging.DEBUG)
import matplotlib.pyplot as plt
import seaborn as sns
from trees.mcmc import MetropolisHastingsSampler
from trees.ddt import *
from scipy.spatial.distance import pdist, squareform
from tqdm import tqdm
from itertools import combinations
import cPickle as pickle
import random

from trees.data import load
from sklearn.decomposition import PCA

data = load('zoo')
X, y = data.X, data.y

pca = PCA(10)
X = pca.fit_transform(X)
X += np.random.normal(size=X.shape) * 0.01

N = X.shape[0]
np.random.seed(0)
# idx = np.random.permutation(np.arange(N))[:20]

# X = X[idx]
# y = np.array(y)
# y = y[idx]

N, D = X.shape
コード例 #6
0
    'vampire': 'Desmodus rotundus',
    'wasp': 'Hymenoptera',
    'tuna': 'Thunnini',
    'tortoise': 'Testudinidae',
    'termite': 'Termitidae',
    'vole': 'Arvicolinae',
    'tuatara': 'Hatteria punctata',
    'wren': 'Troglodytidae',
}


def convert(a):
    return CONVERSION.get(a, a)


X, y = load('zoo')
if os.path.exists('./ids.pkl'):
    with open('./ids.pkl', 'rb') as fp:
        ids = pickle.load(fp)
else:
    tax = api.taxomachine
    animals = [convert(a.replace('+', ' ')) for a in y]
    ids = []
    for animal in tqdm(animals):
        result = tax.TNRS([animal])['results']
        if len(result) == 0:
            print "Failed:", animal
        result = result[0]
        match = result['matches'][0]
        ids.append(match['ot:ottId'])
    with open('./ids.pkl', 'wb') as fp:
コード例 #7
0
ファイル: experiment.py プロジェクト: islamazhar/trees
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp:
    # previous_trees = pickle.load(fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp:
    # pickle.dump(previous_trees + [t.get_state() for t in trees], fp)
    with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp:
        pickle.dump(sampler.tree.get_state(), fp)
    return costs, scores, sampler


if __name__ == "__main__":
    args = parse_args()

    out_dir = Path(args.out_dir) / args.data
    out_dir.mkdir_p()
    dataset_name = args.data
    dataset = load(dataset_name)
    X, y = dataset.X, dataset.y

    if dataset_name == 'mnist' or dataset_name == 'iris' or dataset_name == '20news':
        np.random.seed(0)
        idx = np.random.permutation(xrange(X.shape[0]))[:args.subset]
        X = X[idx]
        y = y[idx]
    if dataset_name == 'mnist' or dataset_name == '20news':
        pca = PCA(10)
        X = pca.fit_transform(X)
    if dataset_name == 'zoo':
        # pca = PCA(5)
        # X = pca.fit_transform(X)
        X += np.random.normal(size=X.shape) * 0.01