예제 #1
        for target_array in target_arrays:
            noise = np.random.randn(*target_array.shape).astype('float32') * std
            new_target_arrays.append(np.clip(target_array + noise, 0, 1))


        yield new_target_arrays, chunk_size

### Alternative image loader and processor which does pysex centering

# pysex_params_train = load_data.load_gz("data/pysex_params_extra_train.npy.gz")
# pysex_params_test = load_data.load_gz("data/pysex_params_extra_test.npy.gz")

pysex_params_train = load_data.load_gz("data/pysex_params_gen2_train.npy.gz")
pysex_params_test = load_data.load_gz("data/pysex_params_gen2_test.npy.gz")

pysexgen1_params_train = load_data.load_gz("data/pysex_params_extra_train.npy.gz")
pysexgen1_params_test = load_data.load_gz("data/pysex_params_extra_test.npy.gz")

center_x, center_y = (IMAGE_WIDTH - 1) / 2.0, (IMAGE_HEIGHT - 1) / 2.0

# def build_pysex_center_transform(img_index, subset='train'):
#     if subset == 'train':
#         x, y, a, b, theta, flux_radius, kron_radius, petro_radius, fwhm = pysex_params_train[img_index]
#     elif subset == 'test':
#         x, y, a, b, theta, flux_radius, kron_radius, petro_radius, fwhm = pysex_params_test[img_index]

#     return build_augmentation_transform(translation=(x - center_x, y - center_y))  
예제 #2
        "Creates a gzipped CSV submission file from a gzipped numpy file with testset predictions."
    print("Usage: create_submission_from_npy.py <input.npy.gz>")

src_path = sys.argv[1]
src_dir = os.path.dirname(src_path)
src_filename = os.path.basename(src_path)
tgt_filename = src_filename.replace(".npy.gz", ".csv")
tgt_path = os.path.join(src_dir, tgt_filename)

test_ids = load_data.test_ids

print("Loading %s" % src_path)

data = load_data.load_gz(src_path)
assert data.shape[0] == load_data.num_test

print("Saving %s" % tgt_path)

with open(tgt_path, 'wb') as csvfile:
    writer = csv.writer(csvfile)  # , delimiter=',', quoting=csv.QUOTE_MINIMAL)

    # write header
        'GalaxyID', 'Class1.1', 'Class1.2', 'Class1.3', 'Class2.1', 'Class2.2',
        'Class3.1', 'Class3.2', 'Class4.1', 'Class4.2', 'Class5.1', 'Class5.2',
        'Class5.3', 'Class5.4', 'Class6.1', 'Class6.2', 'Class7.1', 'Class7.2',
        'Class7.3', 'Class8.1', 'Class8.2', 'Class8.3', 'Class8.4', 'Class8.5',
        'Class8.6', 'Class8.7', 'Class9.1', 'Class9.2', 'Class9.3',
        'Class10.1', 'Class10.2', 'Class10.3', 'Class11.1', 'Class11.2',
예제 #3
          (sbcuda.cuda_ndarray.cuda_ndarray.mem_info()[0] / 1024. / 1024.))

def save_exit():
    # winsol.save()
    print "Done!"
    print ' run for %s' % timedelta(seconds=(time.time() - start_time))

if not REPREDICT_EVERYTIME and os.path.isfile(
        target_path_valid) and os.path.isfile(TRAIN_LOSS_SF_PATH):
    print 'Loading validation predictions from %s and loss from %s ' % (
        target_path_valid, TRAIN_LOSS_SF_PATH)
    predictions = load_data.load_gz(target_path_valid)
        print ''
        print 'Re-evalulating and predicting'

        if DO_VALID:
            evalHist = winsol.evaluate([xs_valid[0], xs_valid[1]],
            evalHist = winsol.load_loss(modelname='model_norm_metrics')

            print ''
            predictions = winsol.predict([xs_valid[0], xs_valid[1]])

            print "Write predictions to %s" % target_path_valid
예제 #4
        for target_array in target_arrays:
            noise = np.random.randn(*target_array.shape).astype('float32') * std
            new_target_arrays.append(np.clip(target_array + noise, 0, 1))


        yield new_target_arrays, chunk_size

### Alternative image loader and processor which does pysex centering

# pysex_params_train = load_data.load_gz("data/pysex_params_extra_train.npy.gz")
# pysex_params_test = load_data.load_gz("data/pysex_params_extra_test.npy.gz")

pysex_params_train = load_data.load_gz("data/pysex_params_gen2_train.npy.gz")
pysex_params_test = load_data.load_gz("data/pysex_params_gen2_test.npy.gz")

pysexgen1_params_train = load_data.load_gz("data/pysex_params_extra_train.npy.gz")
pysexgen1_params_test = load_data.load_gz("data/pysex_params_extra_test.npy.gz")

center_x, center_y = (IMAGE_WIDTH - 1) / 2.0, (IMAGE_HEIGHT - 1) / 2.0

# def build_pysex_center_transform(img_index, subset='train'):
#     if subset == 'train':
#         x, y, a, b, theta, flux_radius, kron_radius, petro_radius, fwhm = pysex_params_train[img_index]
#     elif subset == 'test':
#         x, y, a, b, theta, flux_radius, kron_radius, petro_radius, fwhm = pysex_params_test[img_index]

#     return build_augmentation_transform(translation=(x - center_x, y - center_y))  
valid_ids = train_ids[num_train:]
train_ids = train_ids[:num_train]

train_indices = np.arange(num_train)
valid_indices = np.arange(num_train, num_train + num_valid)
test_indices = np.arange(num_test)

# paths of all the files to blend.
predictions_test_paths = glob.glob(os.path.join(predictions_test_dir, "*.npy.gz"))
predictions_valid_paths = [os.path.join(predictions_valid_dir, os.path.basename(path)) for path in predictions_test_paths]

print "Loading validation set predictions"
predictions_list = [load_data.load_gz(path) for path in predictions_valid_paths]
predictions_stack = np.array(predictions_list).astype(theano.config.floatX) # num_sources x num_datapoints x 37
del predictions_list

print "Compute individual prediction errors"
individual_prediction_errors = np.sqrt(((predictions_stack - y_valid[None])**2).reshape(predictions_stack.shape[0], -1).mean(1))

print "Compiling Theano functions"
X = theano.shared(predictions_stack) # source predictions
t = theano.shared(y_valid) # targets

W = T.vector('W')

예제 #6
train_indices = np.arange(num_train)
valid_indices = np.arange(num_train, num_train + num_valid)
test_indices = np.arange(num_test)

# paths of all the files to blend.
predictions_test_paths = glob.glob(
    os.path.join(predictions_test_dir, "*.npy.gz"))
predictions_valid_paths = [
    os.path.join(predictions_valid_dir, os.path.basename(path))
    for path in predictions_test_paths

print("Loading validation set predictions")
predictions_list = [
    load_data.load_gz(path) for path in predictions_valid_paths
predictions_stack = np.array(predictions_list).astype(
    theano.config.floatX)  # num_sources x num_datapoints x 37
del predictions_list

print("Compute individual prediction errors")
individual_prediction_errors = np.sqrt(
    ((predictions_stack - y_valid[None])**2).reshape(
        predictions_stack.shape[0], -1).mean(1))

print("Compiling Theano functions")
X = theano.shared(predictions_stack)  # source predictions
t = theano.shared(y_valid)  # targets
예제 #7
import load_data

output_names = [
    "smooth", "featureOrdisk", "NoGalaxy", "EdgeOnYes", "EdgeOnNo", "BarYes",
    "BarNo", "SpiralYes", "SpiralNo", "BulgeNo", "BulgeJust", "BulgeObvious",
    "BulgDominant", "OddYes", "OddNo", "RoundCompletly", "RoundBetween",
    "RoundCigar", "Ring", "Lense", "Disturbed", "Irregular", "Other", "Merger",
    "DustLane", "BulgeRound", "BlulgeBoxy", "BulgeNo2", "SpiralTight",
    "SpiralMedium", "SpiralLoose", "Spiral1Arm", "Spiral2Arm", "Spiral3Arm",
    "Spiral4Arm", "SpiralMoreArms", "SpiralCantTell"

#d = pd.read_csv(TRAIN_LABELS_PATH)
#targets = d.as_matrix()[1:, 1:].astype('float32')

targets = load_data.load_gz(

targets = targets.T

output_corr = np.zeros((37, 37))
print targets.shape
for i in xrange(0, 37):
    for j in xrange(i, 37):
        output_corr[i][j] = np.corrcoef(targets[i], targets[j])[0][1]
        if i != j and np.abs(output_corr[i][j]) > 0.3:
            if np.abs(output_corr[i][j]) > 0.7:
                print colored(
                    "%s, %s: %s" %
                    (output_names[i], output_names[j], output_corr[i][j]),
    print "Usage: create_submission_from_npy.py <input.npy.gz>"

src_path = sys.argv[1]
src_dir = os.path.dirname(src_path)
src_filename = os.path.basename(src_path)
tgt_filename = src_filename.replace(".npy.gz", ".csv")
tgt_path = os.path.join(src_dir, tgt_filename)

test_ids = load_data.test_ids

print "Loading %s" % src_path

data = load_data.load_gz(src_path)
assert data.shape[0] == load_data.num_test

print "Saving %s" % tgt_path

with open(tgt_path, "wb") as csvfile:
    writer = csv.writer(csvfile)  # , delimiter=',', quoting=csv.QUOTE_MINIMAL)

    # write header