Exemple #1
0
def test_export():
    """Assert that TPOT's export function throws a ValueError when no optimized pipeline exists"""
    tpot_obj = TPOT()

    try:
        tpot_obj.export("test_export.py")
        assert False  # Should be unreachable
    except ValueError:
        pass
Exemple #2
0
def test_export():
    """Assert that TPOT's export function throws a ValueError when no optimized pipeline exists"""
    tpot_obj = TPOT()

    try:
        tpot_obj.export("test_export.py")
        assert False  # Should be unreachable
    except ValueError:
        pass
Exemple #3
0
def test_export():
    """Ensure that the TPOT export function raises a ValueError when no optimized pipeline exists"""

    tpot_obj = TPOT()

    try:
        tpot_obj.export('will_not_output')
        assert False  # Should be unreachable
    except ValueError:
        pass
Exemple #4
0
def test_export():
    """Ensure that the TPOT export function raises a ValueError when no optimized pipeline exists"""

    tpot_obj = TPOT()

    try:
        tpot_obj.export('will_not_output')
        assert False  # Should be unreachable
    except ValueError:
        pass
from tpot import TPOT
from sklearn.cross_validation import train_test_split
import pandas as pd 
import numpy as numpy

telescope = pd.read_csv("MAGIC Gamma Telescope Data.csv")

#clean the data
telescope_shuffle = telescope.iloc[np.random.permutation(len(telescope))]
tele = telescope_shuffle.reset_index(drop=True)

#Store classes
tele['Class'] = tele['Class'].map({'g':0, 'h':1})
tele_class = tele['Class'].values

#Split data
training_indices, validation_indices = training_indices, testing_indices = train_test_split(tele.index,
		stratify= tele_class, train_size=0.75, test_size=0.25)

#find best model
tpot = TPOT(generations=5, verbosity=2)
tpot.fit(tele.drop('Class', axis=1).loc[training_indices].values,
	tele.loc[training_indices, "Class"].values)

#Score the accuracy
tpot.score(tele.drop('Class', axis=1).loc[validation_indices].values,
	tele.loc[validation_indices, 'Class'].values)

#Export generated code
tpot.export('pipeline.py')
train_test_data = pd.DataFrame.to_dict(load_sen_gene, 'series')
snps = train_test_data['#snp']
genes = train_test_data['gene']

d_array = {}
for snp in snps:
    d_array[snp] = {}
    for i in range(len(clustered_genes)):
        if snp in (clustered_genes[i])['#snp'].values:
            d_array[snp][(clustered_genes[i])['gene'].values[0]] = 1.0
        else:
            d_array[snp][(clustered_genes[i])['gene'].values[0]] = 0.0

df = pd.DataFrame(d_array).T

phenotype = load_bladder_610K['phenotype']
individuals = load_bladder_610K.drop('phenotype', axis=1)

X_train, X_test, y_train, y_test = train_test_split(individuals, phenotype, 
                                                    train_size=0.75, 
                                                    test_size=0.25, 
                                                    random_state=42)

tpot = TPOT(generations=10, population_size=10, verbosity=2)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))
tpot.export('tpot_sen_gene_pipeline_b610k.py')


Exemple #7
0
from tpot import TPOT
from sklearn.datasets import load_iris
from sklearn.cross_validation import train_test_split
iris = load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
                                                    train_size=0.75, test_size=0.25)

tpot = TPOT(generations=5,verbosity=2)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))

tpot.export('tpot_iris_pipeline.py')
Testing TPOT [Tree-based Pipeline Optimization Tool] built by Randy Olson 
(http://www.randalolson.com/2015/11/15/introducing-tpot-the-data-science-assistant/)
"""

from tpot import TPOT
import sys
import pandas as pd
from sklearn.datasets import load_digits  
from sklearn.cross_validation import train_test_split  
  

for i in range (1,len(sys.argv),2):
      if sys.argv[i] == "-df":
        DF = sys.argv[i+1]


df = np.loadtxt(DF, skiprows=1, usecols=range(1,271))

#df = pd.read_csv(DF, sep='\t',header=0, index_col=0)
print(df.info())
y = df[:,0]
x = df[:,1:]
 
X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=0.75)  
  
tpot = TPOT(generations=5, verbosity=2)  
tpot.fit(X_train, y_train)  
print(tpot.score(X_train, y_train, X_test, y_test))
tpot.export('tpot_NNU_k3_pro_p05_pipeline.py')

X = numpy.array(data['X'])
Y = numpy.array(data['Y'])
X_train, X_test, y_train, y_test = train_test_split(X, Y,
                                                    train_size=0.75, test_size=0.25)
X_train.shape, X_test.shape, y_train.shape, y_test.shape


# In[ ]:



tpot = TPOT(generations=1, verbosity=2)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))



# In[ ]:



tpot.export('sentiment_pipeline2.py')


"""
with open("/media/Data2/workspace/projects/kalamacom/sentiment_model_selection.py") as f:
    code = compile(f.read(), "/media/Data2/workspace/projects/kalamacom/sentiment_model_selection.py", 'exec')
    exec(code)

"""
train = train.drop(drop_list,axis=1)
train = train[0:3000000:300]
train.info(memory_usage='deep')



X = train.drop("hotel_cluster",axis=1).values
y = train.loc[: , "hotel_cluster"].values

del train
import gc
gc.collect()

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75,test_size=0.25)

print("got here!")

my_tpot = TPOT(generations=20,verbosity=2,population_size=5) # seems to have a problem with pop <5
# gen 1-> really means two generations!

start = time.clock()
print(start)
my_tpot.fit(X_train, y_train)
my_tpot.export('tpot_expedia_pipeline.py')
end = time.clock()
duration = end - start
score = my_tpot.score(X_test, y_test)
print(duration,score)