def get_catalog(): pth = os.path.join(os.path.dirname(__file__), 'data', 'catalog.pkl') if running_on_cloud(): return pickle.load(bucket.getf('catalog.pkl')) return pickle.load(open(pth))
def negatives_iterator(self, samples_per_field=None): """Yield an infinite sequence of offset stamp parameters Parameters ---------- samples_per_field : int (optional) How many samples to generate before switching longitues by >1 deg. Tuning this can improve IO performance. Yields ------ An infinite sequence of (field_lon, lcen, bcen, rad) """ cat = get_catalog() l0, b0, r0 = cat[:, 0], cat[:, 1], cat[:, 3] while True: lon = self._random_field() nsample = samples_per_field if nsample is None: nsample = 30000 if running_on_cloud() else 500 l = np.random.uniform(-.5, .5, nsample) + lon b = np.random.uniform(-.8, .8, nsample) r = np.random.choice(r0, size=nsample) bad = overlap(l, b, r, l0, b0, r0) l, b, r = l[~bad], b[~bad], r[~bad] for i in range(l.size): yield lon, l[i], b[i], r[i]
def _has_field(lon): """Return True if image data exists for a given longitude""" base = os.path.join(os.path.dirname(__file__), 'data', 'galaxy', 'registered') if not running_on_cloud(): return os.path.exists(os.path.join(base, "%3.3i_i4.fits" % lon)) and \ os.path.exists(os.path.join(base, "%3.3i_mips.fits" % lon)) else: return bucket.exists("%3.3i_i4.fits" % lon) and \ bucket.exists("%3.3i_mips.fits" % lon)
def get_field(lon): """Create and return a new field appropriate for running locally or on PiCloud The previous return value is cached, to avoid repeated I/O for repeated requests for the same field """ global _cached_field if _cached_field is not None and _cached_field.lon == lon: return _cached_field del _cached_field logging.getLogger(__name__).debug("Loading a new field at l=%i" % lon) if running_on_cloud(): result = CloudField(lon) else: result = Field(lon) _cached_field = result return result
def random_iterator(self, samples_per_field=None): """Yield an infinite sequence of random stamp parameters The random sample is generated using Field.random_stamps Parameters ---------- samples_per_field : int (optional) How many samples to generate before switching longitues by >1 deg. Tuning this can improve IO performance. Yields ------ An infinite sequence of (field_lon, lcen, bcen, rad) """ while True: lon = self._random_field() nsample = samples_per_field if nsample is None: nsample = 30000 if running_on_cloud() else 500 for f in get_field(lon).random_stamps(nsample): yield f
import csv import json import os import pickle from os.path import join as path_join from os.path import isfile import joblib import numpy as np from sklearn.preprocessing import LabelEncoder from scipy.io import mmread from sklearn.metrics import mean_absolute_error from sklearn.base import clone from scipy.sparse import coo_matrix, hstack, vstack try: import cloud on_cloud = cloud.running_on_cloud() except Exception as e: on_cloud = False #def read_column(filename, column_name): #"""returns generator with values in column_name in filename""" #csv_file = csv.reader(open(filename, 'r')) #header = csv_file.next() #print header #if column_name not in header: #raise Exception("Column '%s' is not in header! Header: %s" % (column_name, ",".join(header))) #column_index = header.index(column_name) #for line in csv_file: #yield line[column_index]
def data_path(): if cloud.running_on_cloud(): return "/bucket/" else: return os.path.join(os.path.dirname(__file__), "../../data/")
import os #import PyWiseRF from sklearn.ensemble import RandomForestClassifier import cloud import numpy as np if cloud.running_on_cloud(): os.environ['WISERF_ROOT'] = '/home/picloud/WiseRF-1.5.9-linux-x86_64-rc2' def test(): x = np.random.random((5, 5)) y = np.array([1, 1, 1, 0, 0]) clf = WiseRF().fit(x, y) return clf #class WiseRF(PyWiseRF.WiseRF): # def decision_function(self, x): # p = self.predict_proba(x) # return p[:, 1] - p[:, 0] class WiseRF(RandomForestClassifier): def decision_function(self, x): p = self.predict_proba(x) return p[:, 1] - p[:, 0]
import os import PyWiseRF import cloud import numpy as np if cloud.running_on_cloud(): os.environ['WISERF_ROOT'] = '/home/picloud/WiseRF-1.5.9-linux-x86_64-rc2' def test(): x = np.random.random((5, 5)) y = np.array([1, 1, 1, 0, 0]) clf = WiseRF().fit(x, y) return clf class WiseRF(PyWiseRF.WiseRF): def decision_function(self, x): p = self.predict_proba(x) return p[:, 1] - p[:, 0]