Esempio n. 1
0
        for target_array in target_arrays:
            noise = np.random.randn(*target_array.shape).astype('float32') * std
            new_target_arrays.append(np.clip(target_array + noise, 0, 1))

        new_target_arrays.append(labels)

        yield new_target_arrays, chunk_size


### Alternative image loader and processor which does pysex centering

# pysex_params_train = load_data.load_gz("data/pysex_params_extra_train.npy.gz")
# pysex_params_test = load_data.load_gz("data/pysex_params_extra_test.npy.gz")


pysex_params_train = load_data.load_gz("data/pysex_params_gen2_train.npy.gz")
pysex_params_test = load_data.load_gz("data/pysex_params_gen2_test.npy.gz")

pysexgen1_params_train = load_data.load_gz("data/pysex_params_extra_train.npy.gz")
pysexgen1_params_test = load_data.load_gz("data/pysex_params_extra_test.npy.gz")


center_x, center_y = (IMAGE_WIDTH - 1) / 2.0, (IMAGE_HEIGHT - 1) / 2.0

# def build_pysex_center_transform(img_index, subset='train'):
#     if subset == 'train':
#         x, y, a, b, theta, flux_radius, kron_radius, petro_radius, fwhm = pysex_params_train[img_index]
#     elif subset == 'test':
#         x, y, a, b, theta, flux_radius, kron_radius, petro_radius, fwhm = pysex_params_test[img_index]

#     return build_augmentation_transform(translation=(x - center_x, y - center_y))  
Esempio n. 2
0
        "Creates a gzipped CSV submission file from a gzipped numpy file with testset predictions."
    )
    print("Usage: create_submission_from_npy.py <input.npy.gz>")
    sys.exit()

src_path = sys.argv[1]
src_dir = os.path.dirname(src_path)
src_filename = os.path.basename(src_path)
tgt_filename = src_filename.replace(".npy.gz", ".csv")
tgt_path = os.path.join(src_dir, tgt_filename)

test_ids = load_data.test_ids

print("Loading %s" % src_path)

data = load_data.load_gz(src_path)
assert data.shape[0] == load_data.num_test

print("Saving %s" % tgt_path)

with open(tgt_path, 'wb') as csvfile:
    writer = csv.writer(csvfile)  # , delimiter=',', quoting=csv.QUOTE_MINIMAL)

    # write header
    writer.writerow([
        'GalaxyID', 'Class1.1', 'Class1.2', 'Class1.3', 'Class2.1', 'Class2.2',
        'Class3.1', 'Class3.2', 'Class4.1', 'Class4.2', 'Class5.1', 'Class5.2',
        'Class5.3', 'Class5.4', 'Class6.1', 'Class6.2', 'Class7.1', 'Class7.2',
        'Class7.3', 'Class8.1', 'Class8.2', 'Class8.3', 'Class8.4', 'Class8.5',
        'Class8.6', 'Class8.7', 'Class9.1', 'Class9.2', 'Class9.3',
        'Class10.1', 'Class10.2', 'Class10.3', 'Class11.1', 'Class11.2',
Esempio n. 3
0
          (sbcuda.cuda_ndarray.cuda_ndarray.mem_info()[0] / 1024. / 1024.))


def save_exit():
    # winsol.save()
    print "Done!"
    print ' run for %s' % timedelta(seconds=(time.time() - start_time))
    exit()
    sys.exit(0)


if not REPREDICT_EVERYTIME and os.path.isfile(
        target_path_valid) and os.path.isfile(TRAIN_LOSS_SF_PATH):
    print 'Loading validation predictions from %s and loss from %s ' % (
        target_path_valid, TRAIN_LOSS_SF_PATH)
    predictions = load_data.load_gz(target_path_valid)
else:
    try:
        print ''
        print 'Re-evalulating and predicting'

        if DO_VALID:
            evalHist = winsol.evaluate([xs_valid[0], xs_valid[1]],
                                       y_valid=y_valid)
            winsol.save_loss(modelname='model_norm_metrics')
            evalHist = winsol.load_loss(modelname='model_norm_metrics')

            print ''
            predictions = winsol.predict([xs_valid[0], xs_valid[1]])

            print "Write predictions to %s" % target_path_valid
        for target_array in target_arrays:
            noise = np.random.randn(*target_array.shape).astype('float32') * std
            new_target_arrays.append(np.clip(target_array + noise, 0, 1))

        new_target_arrays.append(labels)

        yield new_target_arrays, chunk_size


### Alternative image loader and processor which does pysex centering

# pysex_params_train = load_data.load_gz("data/pysex_params_extra_train.npy.gz")
# pysex_params_test = load_data.load_gz("data/pysex_params_extra_test.npy.gz")


pysex_params_train = load_data.load_gz("data/pysex_params_gen2_train.npy.gz")
pysex_params_test = load_data.load_gz("data/pysex_params_gen2_test.npy.gz")

pysexgen1_params_train = load_data.load_gz("data/pysex_params_extra_train.npy.gz")
pysexgen1_params_test = load_data.load_gz("data/pysex_params_extra_test.npy.gz")


center_x, center_y = (IMAGE_WIDTH - 1) / 2.0, (IMAGE_HEIGHT - 1) / 2.0

# def build_pysex_center_transform(img_index, subset='train'):
#     if subset == 'train':
#         x, y, a, b, theta, flux_radius, kron_radius, petro_radius, fwhm = pysex_params_train[img_index]
#     elif subset == 'test':
#         x, y, a, b, theta, flux_radius, kron_radius, petro_radius, fwhm = pysex_params_test[img_index]

#     return build_augmentation_transform(translation=(x - center_x, y - center_y))  
valid_ids = train_ids[num_train:]
train_ids = train_ids[:num_train]

train_indices = np.arange(num_train)
valid_indices = np.arange(num_train, num_train + num_valid)
test_indices = np.arange(num_test)



# paths of all the files to blend.
predictions_test_paths = glob.glob(os.path.join(predictions_test_dir, "*.npy.gz"))
predictions_valid_paths = [os.path.join(predictions_valid_dir, os.path.basename(path)) for path in predictions_test_paths]

print "Loading validation set predictions"
predictions_list = [load_data.load_gz(path) for path in predictions_valid_paths]
predictions_stack = np.array(predictions_list).astype(theano.config.floatX) # num_sources x num_datapoints x 37
del predictions_list
print

print "Compute individual prediction errors"
individual_prediction_errors = np.sqrt(((predictions_stack - y_valid[None])**2).reshape(predictions_stack.shape[0], -1).mean(1))
print

print "Compiling Theano functions"
X = theano.shared(predictions_stack) # source predictions
t = theano.shared(y_valid) # targets

W = T.vector('W')

Esempio n. 6
0
train_indices = np.arange(num_train)
valid_indices = np.arange(num_train, num_train + num_valid)
test_indices = np.arange(num_test)

# paths of all the files to blend.
predictions_test_paths = glob.glob(
    os.path.join(predictions_test_dir, "*.npy.gz"))
predictions_valid_paths = [
    os.path.join(predictions_valid_dir, os.path.basename(path))
    for path in predictions_test_paths
]

print("Loading validation set predictions")
predictions_list = [
    load_data.load_gz(path) for path in predictions_valid_paths
]
predictions_stack = np.array(predictions_list).astype(
    theano.config.floatX)  # num_sources x num_datapoints x 37
del predictions_list
print()

print("Compute individual prediction errors")
individual_prediction_errors = np.sqrt(
    ((predictions_stack - y_valid[None])**2).reshape(
        predictions_stack.shape[0], -1).mean(1))
print()

print("Compiling Theano functions")
X = theano.shared(predictions_stack)  # source predictions
t = theano.shared(y_valid)  # targets
Esempio n. 7
0
import load_data

output_names = [
    "smooth", "featureOrdisk", "NoGalaxy", "EdgeOnYes", "EdgeOnNo", "BarYes",
    "BarNo", "SpiralYes", "SpiralNo", "BulgeNo", "BulgeJust", "BulgeObvious",
    "BulgDominant", "OddYes", "OddNo", "RoundCompletly", "RoundBetween",
    "RoundCigar", "Ring", "Lense", "Disturbed", "Irregular", "Other", "Merger",
    "DustLane", "BulgeRound", "BlulgeBoxy", "BulgeNo2", "SpiralTight",
    "SpiralMedium", "SpiralLoose", "Spiral1Arm", "Spiral2Arm", "Spiral3Arm",
    "Spiral4Arm", "SpiralMoreArms", "SpiralCantTell"
]

#d = pd.read_csv(TRAIN_LABELS_PATH)
#targets = d.as_matrix()[1:, 1:].astype('float32')

targets = load_data.load_gz(
    'predictions/final/augmented/valid/try_convent_continueAt0p02_next.npy.gz')

targets = targets.T

output_corr = np.zeros((37, 37))
print targets.shape
for i in xrange(0, 37):
    for j in xrange(i, 37):
        output_corr[i][j] = np.corrcoef(targets[i], targets[j])[0][1]
        if i != j and np.abs(output_corr[i][j]) > 0.3:
            if np.abs(output_corr[i][j]) > 0.7:
                print colored(
                    "%s, %s: %s" %
                    (output_names[i], output_names[j], output_corr[i][j]),
                    'green')
            else:
    print "Usage: create_submission_from_npy.py <input.npy.gz>"
    sys.exit()

src_path = sys.argv[1]
src_dir = os.path.dirname(src_path)
src_filename = os.path.basename(src_path)
tgt_filename = src_filename.replace(".npy.gz", ".csv")
tgt_path = os.path.join(src_dir, tgt_filename)


test_ids = load_data.test_ids


print "Loading %s" % src_path

data = load_data.load_gz(src_path)
assert data.shape[0] == load_data.num_test

print "Saving %s" % tgt_path

with open(tgt_path, "wb") as csvfile:
    writer = csv.writer(csvfile)  # , delimiter=',', quoting=csv.QUOTE_MINIMAL)

    # write header
    writer.writerow(
        [
            "GalaxyID",
            "Class1.1",
            "Class1.2",
            "Class1.3",
            "Class2.1",