Ejemplo n.º 1
0
def main():
    localtime = time.asctime(time.localtime(time.time()))
    print(localtime)
    outputlist = []
    filecount = 0
    explore_path = "/Users/praneet/Documents/Kaggle/Amazon/test"
    classifier = SKCompat(
        learn.Estimator(model_fn=train,
                        model_dir="/Users/praneet/Downloads/model"))
    for root, dirs, files in os.walk(explore_path):
        for file_name in files:
            if file_name.endswith(".jpg"):
                lst = []
                eval_data = []
                filecount += 1
                file_path = os.path.abspath(os.path.join(root, file_name))
                img = cv2.imread(file_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                fixed_size = (56, 56)
                img = cv2.resize(img, dsize=fixed_size)
                eval_data.append(img)
                eval_data = np.array(eval_data, dtype=np.float32) / 255.
                predictions = classifier.predict(x=eval_data)
                print(file_name)
                lst.append(file_name)
                for x in predictions['probabilities']:
                    for y in x:
                        lst.append(y)
                outputlist.append(lst)
    print(filecount)
    df = pd.DataFrame(outputlist)
    df.to_csv('output.csv', index=False, header=False)
    localtime = time.asctime(time.localtime(time.time()))
    print(localtime)
Ejemplo n.º 2
0
    def _init_model(self):

        logging.info('Initializing LSTM model ...')

        self.regressor = SKCompat(
            learn.Estimator(model_fn=lstm_model(
                self.configs['time_steps'],
                self.configs['rnn_layers'],
                dense_layers=self.configs['dense_layers'],
                learning_rate=self.configs['learning_rate'])))

        logging.info('LSTM model is initialized.')
def Q_update():
    """Update weights of three models:
       "sell" model, "buy" model and "hold" model
    """
    for action in gv.action_set:
        gv.mylogger.logger.info("Update " + action + " model")

        # # Configure a ValidationMonitor with training data
        # validation_monitor = learn.monitors.ValidationMonitor(
        #     np.float32(Q_data[action]),
        #     np.float32(Q_labels[action]),
        #     every_n_steps=20)

        # Create the estimator
        Q_estimator = learn.Estimator(model_fn=gv.cnn_model_fn,
                                      model_dir=gv.model_dirs[action])

        # Train the model
        SKCompat(Q_estimator).fit(x=train.Q_data[action].astype(np.float32),
                                  y=train.Q_labels[action].astype(np.float32),
                                  steps=training_steps)

        # Evaluate the model and print results
        eval_results = Q_estimator.evaluate(
            x=train.Q_data[action].astype(np.float32),
            y=train.Q_labels[action].astype(np.float32))
        gv.mylogger.logger.info(eval_results)
Ejemplo n.º 4
0
def predict_done():
    LOG_DIR = './ops_logs'
    TIMESTEPS = 10
    RNN_LAYERS = [{'num_units': TIMESTEPS}]
    DENSE_LAYERS = [10, 10]
    TRAINING_STEPS = 1000
    BATCH_SIZE = 100
    PRINT_STEPS = TRAINING_STEPS / 100

    dateparse = lambda dates: pd.datetime.strptime(dates, '%d/%m/%Y %H:%M')
    rawdata = pd.read_csv(
        "./model/input/ElectricityPrice/RealMarketPriceDataPT.csv",
        parse_dates={'timeline': ['date', '(UTC)']},
        index_col='timeline',
        date_parser=dateparse)

    X, y = load_csvdata(rawdata, TIMESTEPS, seperate=False)

    regressor = SKCompat(
        learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS,
                                            DENSE_LAYERS), ))

    validation_monitor = learn.monitors.ValidationMonitor(
        X['val'],
        y['val'],
        every_n_steps=PRINT_STEPS,
        early_stopping_rounds=1000)

    SKCompat(
        regressor.fit(X['train'],
                      y['train'],
                      monitors=[validation_monitor],
                      batch_size=BATCH_SIZE,
                      steps=TRAINING_STEPS))

    predicted = regressor.predict(X['test'])
    mse = mean_absolute_error(y['test'], predicted)

    plot_predicted, = plt.plot(predicted, label='predicted')
    plt.legend(handles=[plot_predicted])

    plt.show()
Ejemplo n.º 5
0
def test():
    if((os.path.isfile("input.csv") == False) and (os.path.isfile("output.csv") == False)):
        gatherData()
   
    regressor = SKCompat(learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),))

    X, y = load_csvdata(TIMESTEPS,seperate=False)


    print('-----------------------------------------')
    print('train y shape',y['train'].shape)
    print('train y shape_num',y['train'][1:5])
    print(y['val'].shape)
    y['val'] = y['val'].reshape(359,8)
    # create a lstm instance and validation monitor
    validation_monitor = learn.monitors.ValidationMonitor(X['val'], y['val'],)
    y['train'] = y['train'].reshape(3239,8)

    SKCompat(regressor.fit(X['train'], y['train'],
                monitors=[validation_monitor],
                batch_size=BATCH_SIZE,
                steps=TRAINING_STEPS))

    print('X train shape', X['train'].shape)
    print('y train shape', y['train'].shape)
    y['test'] = y['test'].reshape(399,8)
    print('X test shape', X['test'].shape)
    print('y test shape', y['test'].shape)
    lol2 = regressor.predict(X['test'])
    predicted = np.asmatrix(lol2,dtype = np.float32) #,as_iterable=False))

    lol = np.asarray((predicted - y['test'])) ** 2
    rmse = np.sqrt((lol).mean()) 
    print(rmse.shape)
    # this previous code for rmse was incorrect, array and not matricies was needed: rmse = np.sqrt(((predicted - y['test']) ** 2).mean())  
    score = mean_squared_error(predicted, y['test']) #.reshape(397,8))
    nmse = score / np.var(y['test']) # should be variance of original data and not data from fitted model, worth to double check

    print("RSME: %f" % rmse)
    print("NSME: %f" % nmse)
    print("MSE: %f" % score)
Ejemplo n.º 6
0
def classify(tr_file, test_file):
    # 数据集名称,数据集要放在你的工作目录下

    if not os.path.exists(tr_file):
        print('no such train file')
    if not os.path.exists(test_file):
        print('no such test_file')
    train_data = tr_file
    test_data = test_file
    # 数据集读取,训练集和测试集
    training_set = tf.contrib.learn.datasets.base.load_csv_without_header(
        filename=train_data, target_dtype=np.int, features_dtype=np.float32)
    test_set = tf.contrib.learn.datasets.base.load_csv_without_header(
        filename=test_data, target_dtype=np.int, features_dtype=np.float32)

    # 特征
    feature_columns = [
        tf.contrib.layers.real_valued_column("", dimension=2763)
    ]

    # 构建DNN网络,3层,每层分别为10,20,10个节点
    classifier = SKCompat(
        tf.contrib.learn.DNNClassifier(
            feature_columns=feature_columns,
            hidden_units=[4096, 8192, 4096, 2048, 512, 128, 32],
            n_classes=13,
            model_dir=
            r'C:\Users\XUEJW\Desktop\兴业数据\分类用数据集\数据集\input dataset\model'))

    # 拟合模型,迭代2000步

    classifier.fit(x=training_set.data, y=training_set.target, steps=2000)

    # 计算精度
    accuracy_score = classifier.evaluate(x=test_set.data,
                                         y=test_set.target)["accuracy"]

    print('Accuracy: {0:f}'.format(accuracy_score))
Ejemplo n.º 7
0
def main(_):
    mnist = learn.datasets.load_dataset('mnist')
    train_data = mnist.train.images
    train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
    eval_data = mnist.test.images
    eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

    mnist_classifier = learn.Estimator(
                config=RunConfig(gpu_memory_fraction=1./3),
                model_fn=lenet_model_fn,
                model_dir='/tmp/lenet_model')

    tensors_to_log = {} #{"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(
            tensors=tensors_to_log,
            every_n_iter=50)

    SKCompat(mnist_classifier).fit(
            x=train_data,
            y=train_labels,
            batch_size=50,
            steps=100,
            monitors=[logging_hook])

    metrics = {
        "accuracy": learn.MetricSpec(
            metric_fn=tf.metrics.accuracy,
            prediction_key="classes"),
    }
    eval_results = SKCompat(mnist_classifier).score(
            x=eval_data,
            y=eval_labels,
            metrics=metrics)
    print(eval_results)

    mnist_classifier.save('/tmp/lenet_model')
def Q_function(state, action):
    """Q-function
       Use trained models to predict
    """
    if path.exists(getcwd() + "/model"):
        # If model already exists, use the model to predict
        # Create the estimator
        Q_estimator = learn.Estimator(
            model_fn=cnn_model_fn,
            model_dir=model_dirs[action])

        # Predict using the estimator
        predictions = SKCompat(Q_estimator).predict(x=state.astype(np.float32))

        return predictions["results"][0][0]
    else:
        # If model doesn't exist, just return random value
        return uniform(-10000, 10000)
Ejemplo n.º 9
0
def bag_of_words_model(features, target):
    """先转成词袋模型"""
    target = tf.one_hot(target, 15, 1, 0)
    features = encoders.bow_encoder(features,
                                    vocab_size=n_words,
                                    embed_dim=EMBEDDING_SIZE)
    logits = tf.contrib.layers.fully_connected(features,
                                               15,
                                               activation_fn=None)
    loss = tf.contrib.losses.softmax_cross_entropy(logits, target)
    train_op = tf.contrib.layers.optimize_loss(
        loss,
        tf.contrib.framework.get_global_step(),
        optimizer='Adam',
        learning_rate=0.01)
    return ({
        'class': tf.argmax(logits, 1),
        'prob': tf.nn.softmax(logits)
    }, loss, train_op)


from tensorflow.contrib.learn.python import SKCompat

model_fn = bag_of_words_model
classifier = SKCompat(learn.Estimator(model_fn=model_fn))

# Train and predict
classifier.fit(x_train, y_train, steps=1000)
y_predicted = classifier.predict(x_test)['class']
score = metrics.accuracy_score(y_test, y_predicted)
print('Accuracy: {0:f}'.format(score))
Ejemplo n.º 10
0
tf.logging.set_verbosity(tf.logging.INFO)

input.load_data()

train_data = np.load("train_data.npy")
#train_label_super = np.load("train_label_super.npy")

test_data = np.load("test_data.npy")
#test_label_super = np.load("test_label_super.npy")

train_label_sub = np.load("train_label_sub.npy")
test_label_sub = np.load("test_label_sub.npy")
print(train_data.shape)

# Create the Estimator
mnist_classifier = SKCompat(
    learn.Estimator(model_fn=train.cnn_model, model_dir="./model"))

epoch = 50

for i in range(epoch):

    # Set up logging for predictions
    # Log the values in the "Softmax" tensor with label "probabilities"
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=50)

    # Train the model
    mnist_classifier.fit(x=train_data,
                         y=train_label_sub,
                         batch_size=128,
Ejemplo n.º 11
0
# test data length
train_len = round(3 * data_len / 4)

x_train = data[0:train_len, :, :]
# start with just predicting the opening value the next day
y_train = data[1:train_len + 1, :, 1]

x_test = data[train_len + 1:-1, :, :]
# start with just predicting the opening value the next day
y_test = data[train_len + 2:, :, 1]

# let's start by just flattening the data
x_train = np.reshape(x_train, (train_len, -1))
x_test = np.reshape(x_test, (len(y_test), -1))

# Specify that all features have real-value data
feature_columns = [
    tf.contrib.layers.real_valued_column("", dimension=x_train.shape[1])
]

classifier = SKCompat(
    learn.DNNRegressor(label_dimension=y_train.shape[1],
                       feature_columns=feature_columns,
                       hidden_units=[100, 50, 20]))
classifier.fit(x_train, y_train, steps=100000, batch_size=100)
score = metrics.r2_score(y_test, classifier.predict(x_test)['scores'])
accuracy = metrics.mean_squared_error(y_test,
                                      classifier.predict(x_test)['scores'])
# score= np.linalg.norm(y_test-classifier.predict(x_test)['scores'])/np.linalg.norm(y_test)
print("Score: %f, Accuracy: %f" % (score, accuracy))
Ejemplo n.º 12
0
DENSE_LAYERS = None
TRAINING_STEPS = 100
PRINT_STEPS = TRAINING_STEPS / 10
BATCH_SIZE = 100
''' org
LOG_DIR = 'resources/logs/'
TIMESTEPS = 1
RNN_LAYERS = [{'num_units': 4}]
DENSE_LAYERS = None
TRAINING_STEPS = 100
PRINT_STEPS = TRAINING_STEPS / 10
BATCH_SIZE = 100
'''

regressor = SKCompat(
    learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),
                    model_dir=LOG_DIR))  # new
# regressor = learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),model_dir=LOG_DIR) # old

X, y = generate_data(np.sin,
                     np.arange(600, dtype=np.int32),
                     TIMESTEPS,
                     seperate=False)
print(X['train'].shape)
print(y['train'].shape)

print(X['train'])
print(y['train'])

# create a lstm instance and validation monitor
Ejemplo n.º 13
0
from sklearn.metrics import mean_squared_error

from lstm_predictior import generate_data, lstm_model, load_csvdata
from weathercsvparser import get_data, print_tab

warnings.filterwarnings("ignore")

LOG_DIR = 'resources/logs/'
TIMESTEPS = 1
RNN_LAYERS = [{'num_units': 400}]
DENSE_LAYERS = None
TRAINING_STEPS = 5000
PRINT_STEPS = TRAINING_STEPS # / 10
BATCH_SIZE = 100

regressor = SKCompat(learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),))
                          #   model_dir=LOG_DIR)

#X, y = generate_data(np.sin, np.linspace(0, 100, 10000, dtype=np.float32), TIMESTEPS, seperate=False)
X, y = load_csvdata(TIMESTEPS,seperate=False)

#noise_train = np.asmatrix(np.random.normal(0,0.2,len(y['train'])),dtype = np.float32)
#noise_val = np.asmatrix(np.random.normal(0,0.2,len(y['val'])),dtype = np.float32)
#noise_test = np.asmatrix(np.random.normal(0,0.2,len(y['test'])),dtype = np.float32) #asmatrix

#noise_train = np.transpose(noise_train)
#noise_val = np.transpose(noise_val)
#noise_test = np.transpose(noise_test)


#y['train'] = np.add( y['train'],noise_train)
Ejemplo n.º 14
0
def main(unused_arguments):
    mnist_classifier = SKCompat(
        learn.Estimator(model_fn=trainTypes,
                        model_dir="/Users/praneet/Downloads/model"))
    print('Predictions: ')
    evallst = []
    outputlst = []
    fileCount = 1
    evaluatePath = "/Users/praneet/Downloads/test/"
    lst = []
    lst.append("image_name")
    lst.append("Type_1")
    lst.append("Type_2")
    lst.append("Type_3")
    evallst.append(lst)
    outputlst.append(lst)
    for root, dirs, files in os.walk(evaluatePath):
        for fileName in files:
            if fileName.endswith(".jpg"):
                eval_data = []
                filePath = os.path.abspath(os.path.join(root, fileName))
                img = cv2.imread(filePath)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = processImage(img)
                fixedSize = (256, 256)
                img = cv2.resize(img, dsize=fixedSize)
                img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
                tmp_data = np.zeros((256, 256), dtype=np.float32)
                for x in range(img.shape[0]):
                    for y in range(img.shape[1]):
                        tmp_data[x][y] = img[x][y] / 255.0
                eval_data.append(tmp_data)
                eval_data = np.array(eval_data)
                # Predict values for each image
                predictions = mnist_classifier.predict(x=eval_data)
                print(fileName, predictions)
                lst = []
                lst.append(fileName)
                for x in predictions['probabilities']:
                    for y in x:
                        lst.append(y)
                outputlst.append(lst)
                if lst[2] > 0.5:
                    lst = []
                    lst.append(fileName)
                    lst.append(0)
                    lst.append(1)
                    lst.append(0)
                if lst[3] > 0.5:
                    lst = []
                    lst.append(fileName)
                    lst.append(0)
                    lst.append(0)
                    lst.append(1)
                if lst[1] > 0.5:
                    lst = []
                    lst.append(fileName)
                    lst.append(1)
                    lst.append(0)
                    lst.append(0)
                if lst[1] == 0 or lst[1] == 1:
                    print("Non Ambiguous Prediction")
                evallst.append(lst)
                fileCount += 1
    print('Total files: ', fileCount)
    df = pd.DataFrame(evallst)
    df.to_csv('output_normalized_check.csv', index=False, header=False)
    df = pd.DataFrame(outputlst)
    df.to_csv('output_integers_check.csv', index=False, header=False)
from tensorflow.contrib.learn.python import SKCompat
from sklearn.metrics import mean_squared_error as MSE

from . import lstmTimeSeriesStacked as l
warnings.filterwarnings("ignore")

LOG_DIR = 'resources/logs/'
TIMESTEPS = 1
RNN_LAYERS = [{'num_units': 500}]
DENSE_LAYERS = 4
TRAINING_STEPS = 500
PRINT_STEPS = TRAINING_STEPS / 10
BATCH_SIZE = 100

regressor = SKCompat(
    learn.Estimator(model_fn=l.lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),
                    model_dir=LOG_DIR))
X, y = l.Sgenerate_data(np.sin,
                        np.linspace(0, 100, 10000, dtype=np.float32),
                        TIMESTEPS,
                        seperate=False)
noise_train = np.asmatrix(np.random.normal(0, 0.2, len(y['train'])),
                          dtype=np.float32)
noise_val = np.asmatrix(np.random.normal(0, 0.2, len(y['val'])),
                        dtype=np.float32)
noise_test = np.asmatrix(np.random.normal(0, 0.2, len(y['test'])),
                         dtype=np.float32)

y['train'] = np.add(y['train'], noise_train)
y['test'] = np.add(y['test'], noise_test)
y['val'] = np.add(y['val'], noise_val)
from pandas import concat

# from lstm_predictor import generate_data, lstm_model

warnings.filterwarnings("ignore")

LOG_DIR = 'resources/logs/'
TIMESTEPS = 1
RNN_LAYERS = [{'num_units': 400}]
DENSE_LAYERS = None
TRAINING_STEPS = 3000
PRINT_STEPS = TRAINING_STEPS  # / 10
BATCH_SIZE = 1

regressor = SKCompat(
    learn.Estimator(model_fn=predictor.lstm_model(TIMESTEPS, RNN_LAYERS,
                                                  DENSE_LAYERS), ))
#   model_dir=LOG_DIR)
from pandas import read_csv
from pandas import Series

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

series = read_csv(
    '../CorpData/InventoryHistory/2010_2018_books_sortable inventory.csv',
    header=0,
    parse_dates=[0],
    index_col=0,
    squeeze=True,
    usecols=[0, 4])
Ejemplo n.º 17
0
fcDim = [10**exp, 10**exp, 10**exp]  #, 10**exp, 10**exp]
TRAINING_STEPS = 10**3
PRINT_STEPS = TRAINING_STEPS / 10
BATCH_SIZE = 100

x, y = genData(np.sin, np.linspace(0, 10**2, 10**4, dtype=np.float32),
               TIMESTEPS)

# create a lstm instance and validation monitor
validation_monitor = learn.monitors.ValidationMonitor(
    x['val'],
    y['val'],
)

# Get the RNN model
model = SKCompat(
    learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, fcDim)))

# TRAIN START HERE ===========================================
model.fit(x['train'],
          y['train'],
          monitors=[validation_monitor],
          batch_size=BATCH_SIZE,
          steps=TRAINING_STEPS)

# TEST START HERE ===========================================
predicted = np.asmatrix(model.predict(x['test']), dtype=np.float32)

# Analyse Test Result
rmse = np.sqrt((np.asarray((np.subtract(predicted, y['test'])))**2).mean())
score = mean_squared_error(predicted, y['test'])
nmse = score / np.var(
        output = dnn_layers(output[-1], dense_layers)
        prediction, loss = tflearn.models.linear_regression(output, y)
        train_op = tf.contrib.layers.optimize_loss(
            loss,
            tf.contrib.framework.get_global_step(),
            optimizer=optimizer,
            learning_rate=learning_rate)
        return prediction, loss, train_op

    return _lstm_model


# regressor = learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS))

regressor = SKCompat(
    learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS,
                                        DENSE_LAYERS), ))

df = pd.read_csv("data/elec_load.csv", error_bad_lines=False)
plt.subplot()
plot_test, = plt.plot(df.values[:1500], label='Load')
plt.legend(handles=[plot_test])

print(df.describe())
array = (df.values - 147.0) / 339.0
plt.subplot()
plot_test, = plt.plot(array[:1500], label='Normalized Load')
plt.legend(handles=[plot_test])

listX = []
listy = []
Ejemplo n.º 19
0
import tensorflow as tf
import numpy as np
from tensorflow.contrib.learn.python import SKCompat

iris = np.loadtxt('Data/Data/iris_softmax.csv',
                  delimiter=',',
                  dtype=np.float32)

x = iris[:, :-3]
y = iris[:, -3:]
y = np.argmax(y, axis=1)

feature_columns = [tf.feature_column.numeric_column('', shape=[5])]

clf = tf.contrib.learn.DNNClassifier(hidden_units=[10, 20, 5],
                                     feature_columns=feature_columns,
                                     n_classes=3)

clf = SKCompat(clf)

clf.fit(x=x, y=y, max_steps=1000)

print(clf.score(x=x, y=y))
Ejemplo n.º 20
0
class Cloud_Forecaster(Base_Forecaster):
    '''
    Description: Cloud cover forecast based on LSTM - RNN
    '''
    def __init__(self, raw_data, configs, mode, scale=1, outpath='../outputs'):
        '''
        scale: relationship bet. time steps and data interval (1/60)
        '''

        logging.info('Initializing Cloud Forecaster ...')

        super(Cloud_Forecaster, self).__init__(raw_data, configs, outpath)

        # load configurations
        self.scale = scale
        self.mode = mode

        logging.info('Cloud Forecaster is initialized.')

    def fit(self, datakey='total_cloud_fraction', ubd_min=8, lbd_max=15):

        logging.info('Start fitting data ...')

        # create tensorflow model
        self._init_model()

        # preprocess data
        ccp = Cloud_Cover_Preprocessor(self.raw_data,
                                       self.configs['time_steps'], datakey,
                                       self.scale, ubd_min, lbd_max, self.mode)
        self.feats, self.labels = ccp.preprocess()
        self._fit()

        logging.info('Fitting data is complete.')

    def _init_model(self):

        logging.info('Initializing LSTM model ...')

        self.regressor = SKCompat(
            learn.Estimator(model_fn=lstm_model(
                self.configs['time_steps'],
                self.configs['rnn_layers'],
                dense_layers=self.configs['dense_layers'],
                learning_rate=self.configs['learning_rate'])))

        logging.info('LSTM model is initialized.')

    def _fit(self):

        logging.info('Fitting training data ...')

        # adjust matrix dimensions
        x_train = np.expand_dims(self.feats['train'], axis=2)
        y_train = np.expand_dims(self.labels['train'], axis=2)

        logging.debug('Shape of x_train is: %s' % str(x_train.shape))
        logging.debug('Shape of y_train is: %s' % str(y_train.shape))

        # start training
        self.regressor.fit(x_train,
                           y_train,
                           batch_size=self.configs['batch_size'],
                           steps=self.configs['training_steps'])

        logging.info('Training data is fitted.')

    def get_test_score(self):

        logging.info('Testing on test data sets ...')

        x_test = np.expand_dims(self.feats['test'], axis=2)

        # start prediction
        preds = self.regressor.predict(x_test)

        # calculate MSE error
        mse = mean_squared_error(self.labels['test'], preds)

        rst_dict = {'preds': preds, 'labels': self.labels['test']}

        sio.savemat('../outputs/data_%s_fmt_%s.mat' %\
                    (self.configs['data_name'], self.mode),
                    rst_dict)

        logging.info('Testing is completed.')

        return np.sqrt(mse)
Ejemplo n.º 21
0
def main():
    localtime = time.asctime(time.localtime(time.time()))
    print(localtime)
    label_dict = {}
    file_train_data = []
    file_train_label = []
    train_data = []
    train_label = []
    filecount = 0
    labels = {
        'agriculture': 0,
        'artisinal_mine': 1,
        'bare_ground': 2,
        'blooming': 3,
        'blow_down': 4,
        'clear': 5,
        'cloudy': 6,
        'conventional_mine': 7,
        'cultivation': 8,
        'habitation': 9,
        'haze': 10,
        'partly_cloudy': 11,
        'primary': 12,
        'road': 13,
        'selective_logging': 14,
        'slash_burn': 15,
        'water': 16
    }
    df = pd.read_csv('train_v2.csv', header=None, dtype=object)
    for x in df.as_matrix()[1:]:
        label_lst = []
        for y in str(x[1:][0]).split(' '):
            label_lst.append(labels[y])
        label_dict[x[0]] = label_lst
    explore_path = "/Users/praneet/Documents/Kaggle/Amazon/train"
    for root, dirs, files in os.walk(explore_path):
        for file_name in files:
            if file_name.endswith(".jpg"):
                filecount += 1
                print(file_name)
                file_path = os.path.abspath(os.path.join(root, file_name))
                img = cv2.imread(file_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                fixed_size = (56, 56)
                img = cv2.resize(img, dsize=fixed_size)
                file_title = file_name.split('.')[0]
                file_train_data.append(img)
                file_train_label.append(label_dict[file_title])
    print(filecount)
    for x in range(len(file_train_label)):
        for y in range(len(file_train_label[x])):
            train_data.append(file_train_data[x])
            train_label.append(file_train_label[x][y])
    file_train_data = None
    file_train_label = None
    del file_train_data
    del file_train_label
    gc.collect()
    print(len(train_data))
    print(len(train_label))
    train_data = np.array(train_data, dtype=np.float32) / 255.
    train_label = np.array(train_label, dtype=np.int32)
    # Create the Estimator
    classifier = SKCompat(
        learn.Estimator(model_fn=train,
                        model_dir="/Users/praneet/Downloads/model"))
    # Set up logging for predictions
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=50)
    # Train the model
    classifier.fit(x=train_data,
                   y=train_label,
                   batch_size=100,
                   steps=1750,
                   monitors=[logging_hook])
    localtime = time.asctime(time.localtime(time.time()))
    print(localtime)
def main(unused_arguments):
    localtime = time.asctime(time.localtime(time.time()))
    print(localtime)
    content = np.loadtxt('train_data')
    train_data = []
    temp = 0
    tmp = []
    for x in content:
        if temp == 255:
            tmp.append(x)
            train_data.append(tmp)
            temp = 0
            tmp = []
        else:
            tmp.append(x)
            temp += 1
    with open('train_labels', 'rb') as fp:
        train_labels = pickle.load(fp)
    train_data = np.array(train_data, dtype=np.float32)
    train_labels = np.array(train_labels, dtype=np.int32)
    # Create the Estimator
    mnist_classifier = SKCompat(
        learn.Estimator(model_fn=trainTypes,
                        model_dir="/Users/praneet/Downloads/model"))
    # Set up logging for predictions
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=50)
    # Train the model
    mnist_classifier.fit(x=train_data,
                         y=train_labels,
                         batch_size=100,
                         steps=2500,
                         monitors=[logging_hook])
    # Accuracy metric for evaluation
    metrics = {
        "accuracy":
        learn.MetricSpec(metric_fn=tf.metrics.accuracy,
                         prediction_key="classes"),
    }
    # Evaluation
    print('Evaluation: ')
    content = np.loadtxt('eval_data')
    eval_data = []
    temp = 0
    tmp = []
    for x in content:
        if temp == 255:
            tmp.append(x)
            eval_data.append(tmp)
            temp = 0
            tmp = []
        else:
            tmp.append(x)
            temp += 1
    with open('eval_labels', 'rb') as fp:
        eval_labels = pickle.load(fp)
    eval_data = np.array(eval_data, dtype=np.float32)
    eval_labels = np.array(eval_labels, dtype=np.int32)
    # Evaluate the model
    eval_results = mnist_classifier.score(x=eval_data,
                                          y=eval_labels,
                                          metrics=metrics)
    print(eval_results)
    # Predictions
    print('Predictions: ')
    evallst = []
    outputlst = []
    fileCount = 1
    evaluatePath = "/Users/praneet/Downloads/test/"
    lst = []
    lst.append("image_name")
    lst.append("Type_1")
    lst.append("Type_2")
    lst.append("Type_3")
    evallst.append(lst)
    outputlst.append(lst)
    for root, dirs, files in os.walk(evaluatePath):
        for fileName in files:
            if fileName.endswith(".jpg"):
                eval_data = []
                filePath = os.path.abspath(os.path.join(root, fileName))
                img = cv2.imread(filePath)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = processImage(img)
                fixedSize = (256, 256)
                img = cv2.resize(img, dsize=fixedSize)
                img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
                tmp_data = np.zeros((256, 256), dtype=np.float32)
                for x in range(img.shape[0]):
                    for y in range(img.shape[1]):
                        tmp_data[x][y] = img[x][y] / 255.0
                eval_data.append(tmp_data)
                eval_data = np.array(eval_data)
                # Predict values for each image
                predictions = mnist_classifier.predict(x=eval_data)
                print(fileName, predictions)
                lst = []
                lst.append(fileName)
                for x in predictions['probabilities']:
                    for y in x:
                        lst.append(y)
                outputlst.append(lst)
                if lst[1] > 0.25:
                    lst = []
                    lst.append(fileName)
                    lst.append(1)
                    lst.append(0)
                    lst.append(0)
                if lst[3] > 0.29:
                    lst = []
                    lst.append(fileName)
                    lst.append(0)
                    lst.append(0)
                    lst.append(1)
                if lst[2] > 0.58:
                    lst = []
                    lst.append(fileName)
                    lst.append(0)
                    lst.append(1)
                    lst.append(0)
                if lst[1] == 0 or lst[1] == 1:
                    print("Non Ambiguous Prediction")
                evallst.append(lst)
                fileCount += 1
    print('Total files: ', fileCount)
    df = pd.DataFrame(evallst)
    df.to_csv('output_normalized.csv', index=False, header=False)
    df = pd.DataFrame(outputlst)
    df.to_csv('output_integers.csv', index=False, header=False)
    localtime = time.asctime(time.localtime(time.time()))
    print(localtime)