예제 #1
0
    'stingray': 'Hexatrygonidae',
    'wolf': 'Canis lupus',
    'vampire': 'Desmodus rotundus',
    'wasp': 'Hymenoptera',
    'tuna': 'Thunnini',
    'tortoise': 'Testudinidae',
    'termite': 'Termitidae',
    'vole': 'Arvicolinae',
    'tuatara': 'Hatteria punctata',
    'wren': 'Troglodytidae',
}

def convert(a):
    return CONVERSION.get(a, a)

X, y = load('zoo')
if os.path.exists('./ids.pkl'):
    with open('./ids.pkl', 'rb') as fp:
        ids = pickle.load(fp)
else:
    tax = api.taxomachine
    animals = [convert(a.replace('+', ' ')) for a in y]
    ids = []
    for animal in tqdm(animals):
        result = tax.TNRS([animal])['results']
        if len(result) == 0:
            print "Failed:", animal
        result = result[0]
        match = result['matches'][0]
        ids.append(match['ot:ottId'])
    with open('./ids.pkl', 'wb') as fp:
예제 #2
0
import seaborn as sns
sns.set_style("white")
import logging
logging.basicConfig(level=logging.INFO)
import cPickle as pickle
import numpy as np
from sklearn.decomposition import PCA
from path import Path
from tqdm import tqdm

from trees.data import load
from trees.interact import Database, Interactor
from trees.ddt import GaussianLikelihoodModel, DirichletDiffusionTree, Inverse
from trees.mcmc import MetropolisHastingsSampler

mnist = load('mnist')
database = Database('mnist')
interactor = Interactor(mnist, database)

X, y = mnist.X, mnist.y

X = X.astype(np.float32)
X /= 255.0

X -= X.mean(axis=0)

logging.debug("Finding PCA...")
pca_path = Path("pca.pkl")
if pca_path.exists():
    with open(pca_path, 'rb') as fp:
        pca = pickle.load(fp)
예제 #3
0
파일: dasgupta.py 프로젝트: sharadmv/trees
from cStringIO import StringIO
import numpy as np
import logging
# logging.basicConfig(level=logging.DEBUG)
import matplotlib.pyplot as plt
import seaborn as sns
from trees.dasgupta import DasguptaTree
from trees.mcmc import SPRSampler
from scipy.spatial.distance import pdist, squareform
from tqdm import tqdm
from itertools import combinations

from trees.data import load
from sklearn.decomposition import PCA

data = load('zoo')
X, y = data.X, data.y

pca = PCA(10)
X = pca.fit_transform(X)
X += np.random.normal(size=X.shape) * 0.01

N = X.shape[0]
np.random.seed(0)
# idx = np.random.permutation(np.arange(N))[:20]

# X = X[idx]
# y = np.array(y)
# y = y[idx]

D = 1.0 / squareform(pdist(X))
예제 #4
0
        pickle.dump(costs, fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp:
        # previous_trees = pickle.load(fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp:
        # pickle.dump(previous_trees + [t.get_state() for t in trees], fp)
    with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp:
        pickle.dump(sampler.tree.get_state(), fp)
    return costs, scores, sampler

if __name__ == "__main__":
    args = parse_args()

    out_dir = Path(args.out_dir) / args.data
    out_dir.mkdir_p()
    dataset_name = args.data
    dataset = load(dataset_name)
    X, y = dataset.X, dataset.y

    if dataset_name == 'mnist' or dataset_name == 'iris' or dataset_name == '20news':
        np.random.seed(0)
        idx = np.random.permutation(xrange(X.shape[0]))[:args.subset]
        X = X[idx]
        y = y[idx]
    if dataset_name == 'mnist' or dataset_name == '20news':
        pca = PCA(10)
        X = pca.fit_transform(X)
    if dataset_name == 'zoo':
        # pca = PCA(5)
        # X = pca.fit_transform(X)
        X += np.random.normal(size=X.shape) * 0.01
예제 #5
0
파일: var.py 프로젝트: islamazhar/trees
import numpy as np
# logging.basicConfig(level=logging.DEBUG)
import matplotlib.pyplot as plt
import seaborn as sns
from trees.mcmc import MetropolisHastingsSampler
from trees.ddt import *
from scipy.spatial.distance import pdist, squareform
from tqdm import tqdm
from itertools import combinations
import cPickle as pickle
import random

from trees.data import load
from sklearn.decomposition import PCA

data = load('zoo')
X, y = data.X, data.y

pca = PCA(10)
X = pca.fit_transform(X)
X += np.random.normal(size=X.shape) * 0.01

N = X.shape[0]
np.random.seed(0)
# idx = np.random.permutation(np.arange(N))[:20]

# X = X[idx]
# y = np.array(y)
# y = y[idx]

N, D = X.shape
예제 #6
0
    'vampire': 'Desmodus rotundus',
    'wasp': 'Hymenoptera',
    'tuna': 'Thunnini',
    'tortoise': 'Testudinidae',
    'termite': 'Termitidae',
    'vole': 'Arvicolinae',
    'tuatara': 'Hatteria punctata',
    'wren': 'Troglodytidae',
}


def convert(a):
    return CONVERSION.get(a, a)


X, y = load('zoo')
if os.path.exists('./ids.pkl'):
    with open('./ids.pkl', 'rb') as fp:
        ids = pickle.load(fp)
else:
    tax = api.taxomachine
    animals = [convert(a.replace('+', ' ')) for a in y]
    ids = []
    for animal in tqdm(animals):
        result = tax.TNRS([animal])['results']
        if len(result) == 0:
            print "Failed:", animal
        result = result[0]
        match = result['matches'][0]
        ids.append(match['ot:ottId'])
    with open('./ids.pkl', 'wb') as fp:
예제 #7
0
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'r') as fp:
    # previous_trees = pickle.load(fp)
    # with open(out_dir / name / 'trees-%u.pkl' % index, 'w') as fp:
    # pickle.dump(previous_trees + [t.get_state() for t in trees], fp)
    with open(out_dir / name / 'final-tree-%u.pkl' % index, 'w') as fp:
        pickle.dump(sampler.tree.get_state(), fp)
    return costs, scores, sampler


if __name__ == "__main__":
    args = parse_args()

    out_dir = Path(args.out_dir) / args.data
    out_dir.mkdir_p()
    dataset_name = args.data
    dataset = load(dataset_name)
    X, y = dataset.X, dataset.y

    if dataset_name == 'mnist' or dataset_name == 'iris' or dataset_name == '20news':
        np.random.seed(0)
        idx = np.random.permutation(xrange(X.shape[0]))[:args.subset]
        X = X[idx]
        y = y[idx]
    if dataset_name == 'mnist' or dataset_name == '20news':
        pca = PCA(10)
        X = pca.fit_transform(X)
    if dataset_name == 'zoo':
        # pca = PCA(5)
        # X = pca.fit_transform(X)
        X += np.random.normal(size=X.shape) * 0.01