Example #1
0
    'stingray': 'Hexatrygonidae',
    'wolf': 'Canis lupus',
    'vampire': 'Desmodus rotundus',
    'wasp': 'Hymenoptera',
    'tuna': 'Thunnini',
    'tortoise': 'Testudinidae',
    'termite': 'Termitidae',
    'vole': 'Arvicolinae',
    'tuatara': 'Hatteria punctata',
    'wren': 'Troglodytidae',
}

def convert(a):
    return CONVERSION.get(a, a)

X, y = load('zoo')
if os.path.exists('./ids.pkl'):
    with open('./ids.pkl', 'rb') as fp:
        ids = pickle.load(fp)
else:
    tax = api.taxomachine
    animals = [convert(a.replace('+', ' ')) for a in y]
    ids = []
    for animal in tqdm(animals):
        result = tax.TNRS([animal])['results']
        if len(result) == 0:
            print "Failed:", animal
        result = result[0]
        match = result['matches'][0]
        ids.append(match['ot:ottId'])
    with open('./ids.pkl', 'wb') as fp:
Example #2
0
import seaborn as sns
sns.set_style("white")
import logging
logging.basicConfig(level=logging.INFO)
import cPickle as pickle
import numpy as np
from sklearn.decomposition import PCA
from path import Path
from tqdm import tqdm

from trees.data import load
from trees.interact import Database, Interactor
from trees.ddt import GaussianLikelihoodModel, DirichletDiffusionTree, Inverse
from trees.mcmc import MetropolisHastingsSampler

mnist = load('mnist')
database = Database('mnist')
interactor = Interactor(mnist, database)

X, y = mnist.X, mnist.y

X = X.astype(np.float32)
X /= 255.0

X -= X.mean(axis=0)

logging.debug("Finding PCA...")
pca_path = Path("pca.pkl")
if pca_path.exists():
    with open(pca_path, 'rb') as fp:
        pca = pickle.load(fp)
Example #3
0
from cStringIO import StringIO
import numpy as np
import logging
# logging.basicConfig(level=logging.DEBUG)
import matplotlib.pyplot as plt
import seaborn as sns
from trees.dasgupta import DasguptaTree
from trees.mcmc import SPRSampler
from scipy.spatial.distance import pdist, squareform
from tqdm import tqdm
from itertools import combinations

from trees.data import load
from sklearn.decomposition import PCA

data = load('zoo')
X, y = data.X, data.y

pca = PCA(10)
X = pca.fit_transform(X)
X += np.random.normal(size=X.shape) * 0.01

N = X.shape[0]
np.random.seed(0)
# idx = np.random.permutation(np.arange(N))[:20]

# X = X[idx]
# y = np.array(y)
# y = y[idx]

D = 1.0 / squareform(pdist(X))
Example #4
0
        pickle.dump(costs, fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp:
        # previous_trees = pickle.load(fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp:
        # pickle.dump(previous_trees + [t.get_state() for t in trees], fp)
    with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp:
        pickle.dump(sampler.tree.get_state(), fp)
    return costs, scores, sampler

if __name__ == "__main__":
    args = parse_args()

    out_dir = Path(args.out_dir) / args.data
    out_dir.mkdir_p()
    dataset_name = args.data
    dataset = load(dataset_name)
    X, y = dataset.X, dataset.y

    if dataset_name == 'mnist' or dataset_name == 'iris' or dataset_name == '20news':
        np.random.seed(0)
        idx = np.random.permutation(xrange(X.shape[0]))[:args.subset]
        X = X[idx]
        y = y[idx]
    if dataset_name == 'mnist' or dataset_name == '20news':
        pca = PCA(10)
        X = pca.fit_transform(X)
    if dataset_name == 'zoo':
        # pca = PCA(5)
        # X = pca.fit_transform(X)
        X += np.random.normal(size=X.shape) * 0.01
Example #5
0
import numpy as np
# logging.basicConfig(level=logging.DEBUG)
import matplotlib.pyplot as plt
import seaborn as sns
from trees.mcmc import MetropolisHastingsSampler
from trees.ddt import *
from scipy.spatial.distance import pdist, squareform
from tqdm import tqdm
from itertools import combinations
import cPickle as pickle
import random

from trees.data import load
from sklearn.decomposition import PCA

data = load('zoo')
X, y = data.X, data.y

pca = PCA(10)
X = pca.fit_transform(X)
X += np.random.normal(size=X.shape) * 0.01

N = X.shape[0]
np.random.seed(0)
# idx = np.random.permutation(np.arange(N))[:20]

# X = X[idx]
# y = np.array(y)
# y = y[idx]

N, D = X.shape
Example #6
0
    'vampire': 'Desmodus rotundus',
    'wasp': 'Hymenoptera',
    'tuna': 'Thunnini',
    'tortoise': 'Testudinidae',
    'termite': 'Termitidae',
    'vole': 'Arvicolinae',
    'tuatara': 'Hatteria punctata',
    'wren': 'Troglodytidae',
}


def convert(a):
    return CONVERSION.get(a, a)


X, y = load('zoo')
if os.path.exists('./ids.pkl'):
    with open('./ids.pkl', 'rb') as fp:
        ids = pickle.load(fp)
else:
    tax = api.taxomachine
    animals = [convert(a.replace('+', ' ')) for a in y]
    ids = []
    for animal in tqdm(animals):
        result = tax.TNRS([animal])['results']
        if len(result) == 0:
            print "Failed:", animal
        result = result[0]
        match = result['matches'][0]
        ids.append(match['ot:ottId'])
    with open('./ids.pkl', 'wb') as fp:
Example #7
0
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp:
    # previous_trees = pickle.load(fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp:
    # pickle.dump(previous_trees + [t.get_state() for t in trees], fp)
    with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp:
        pickle.dump(sampler.tree.get_state(), fp)
    return costs, scores, sampler


if __name__ == "__main__":
    args = parse_args()

    out_dir = Path(args.out_dir) / args.data
    out_dir.mkdir_p()
    dataset_name = args.data
    dataset = load(dataset_name)
    X, y = dataset.X, dataset.y

    if dataset_name == 'mnist' or dataset_name == 'iris' or dataset_name == '20news':
        np.random.seed(0)
        idx = np.random.permutation(xrange(X.shape[0]))[:args.subset]
        X = X[idx]
        y = y[idx]
    if dataset_name == 'mnist' or dataset_name == '20news':
        pca = PCA(10)
        X = pca.fit_transform(X)
    if dataset_name == 'zoo':
        # pca = PCA(5)
        # X = pca.fit_transform(X)
        X += np.random.normal(size=X.shape) * 0.01