Exemple #1
0
def main(cat, cat_name):
    data = cat['dat']
    r, rbins = c.make_r_scale(.1, 20, 25)
    pair_proxies = ['c%.2f' % _ for _ in r]
    names = ['rhillmass', 'dm5e12', 's5', 'd1', 'Pall']
    proxy_list = [['rhillmass'], ['d5e12', 'm5e12'], ['s5'], ['d1'], pair_proxies]
    predicted_ssfrs = []

    for proxies, name in zip(proxy_list, names):
        data = util.load_proxies(data, 'data/' + cat_name + '/', proxies, proxies)
        features = proxies + ['mstar']
        dtrain, dtest, regressor = model.trainRegressor(data, features)
        predicted_ssfrs.append(dtest['pred'])

    log_dir = util.get_logging_dir(cat_name)
    for proxies, pred, name in zip(proxy_list, predicted_ssfrs, names):
        dtest['pred'] = pred
        util.train_and_dump_rwp(data, proxies + ['mstar'], name + '.dat', '',
                                cat['box_size'], cat['red_cut'], logging=False)
        util.train_and_dump_rwp_bins(data, proxies + ['mstar'], name + '.dat',
                                     '', cat['box_size'], num_splits=1,
                                     red_cut=cat['red_cut'], logging=False)
        xcf = c.cross_correlation_function(dtest, cat['red_cut'],
                                           box_size=cat['box_size'])
        util.dump_data(xcf, name + '_xcf.dat', log_dir)
        mcf = c.jackknife_mcf(dtest, box_size=cat['box_size'])
        util.dump_data(mcf, name + '_mcf.dat', log_dir)

        mlims = [(9.9, 10.1), (10.1, 10.3), (10.5, 10.7)]  # change for illustris
        fnames = [''.join([name, '_conformity_', str(num), '.dat']) for num in [10.0, 10.2, 10.6]]

        for mlim, fname in zip(mlims, fnames):
            res = c.radial_conformity_wrapper(dtest, cat['box_size'], mlim[0], mlim[1])
            util.dump_data(res, fname, log_dir)
Exemple #2
0
def download_list():
    proxies = util.load_proxies()

    for i in range(1,802): #802               
        url = 'http://www.cnhm.net/plant/index/page/%d' % (i)
        lfile = '%scnhm/list_%d.html' % (config.local_dir,i) 

        proxy = random.choice(proxies).strip()
        print i,url,proxy

        respHtml = browser.downad_and_save(url,lfile)         
        soup = BeautifulSoup(respHtml, fromEncoding=htmlCharset)

        table = soup.find('div',attrs = {'class': 'main'})
        if not table:
            os.system('rm -f %s'%(lfile))
            print 'err'
            continue 

        xitems = []
        tr_list = table.findAll('li')
        for tr in tr_list:
            plant_name = tr.find('div',attrs = {'class': 'PlantName'})
            namea = plant_name.find('a')
            uid = util.completeInnerUrl(url,namea['href'])
            cn_name = namea['title']
            lating_name = tr.find('div',attrs = {'class': 'LatinName'})['title']            
            section = tr.find('div',attrs = {'class': 'section'}).contents[0]
            genera = tr.find('div',attrs = {'class': 'genera'}).contents[0]
            xitems.append(','.join([uid,cn_name,lating_name,section,genera ]))

        with open('%scnhm/xlist_%s.csv'%(config.local_dir,i),'w') as f:
            f.write('\r\n'.join(xitems)+'\r\n')
            f.close()
Exemple #3
0
def mtheater_all():
    proxies = load_proxies()
    for i in range(1,6):
        try:
            # proxy = random.choice(proxies).strip()
            download_mtheater_list(i)
        except Exception,e:
            print i,'err',e
Exemple #4
0
def download1():
    proxies = util.load_proxies()
    
    rows = db.select('plants',what="pk_id,name_cn,source_uri" ) # ,offset=0,limit=1
    for r in rows:
        proxy = random.choice(proxies).strip() 
        print r.pk_id,r.source_uri,proxy
        try:
            download_detail(r.source_uri,proxy)
        except Exception,ex:
            print r.pk_id,r.source_uri,proxy,'err',ex 
Exemple #5
0
def load_words():
    proxies = util.load_proxies()
    
    reader = csv.reader(file('%scnhm.csv'%(config.local_dir), 'rb'))
    for line in reader:
        proxy = random.choice(proxies).strip() 
        pid = line[0].strip() #int() 

        try:            
            print pid,line[1],proxy            
            download(line[0].strip(),line[1],proxy)
        except Exception,ex:
            print pid,'err',ex
Exemple #6
0
import model, calc, util
import plotting as p
from collections import defaultdict
import numpy as np

cat = util.get_catalog('HW')
dat = cat['dat']
halo_proxies = ['rhillmass', 'd5e12', 'm5e12']
dat = util.load_proxies(dat, 'data/HW/', halo_proxies, halo_proxies)
_, rhill_df, _ = model.trainRegressor(dat, cat['box_size'],
        halo_proxies[:1])
_, dm5e12_df, _ = model.trainRegressor(dat, cat['box_size'],
        halo_proxies[1:])

mbins = [7e13, 1e14, 3e14]

def get_richness_count(df, mbins, color='ssfr', red_cut=-11):
    hosts = df[df['upid'] == -1]
    host_id_to_host_mass = {_ for _ in zip(hosts['id'], hosts['mvir'])}
    count_bins = [[],[]]
    for hid, hmass in zip(hosts['id'], hosts['mvir']):
        if hmass < mbins[0] or hmass > mbins[-1]:
            continue
        subs = df[df['upid'] == hid]
        red_subs = len(subs[color] < red_cut)
        mbin_idx = np.digitize(hmass, mbins)[0]
        count_bins.append(red_subs)
    return count_bins

hw_richness = get_richness_count(rhill_df, mbins)
rhill_richness = get_richness_count(rhill_df, mbins, 'pred')
Exemple #7
0
from sklearn import preprocessing
from sklearn.gaussian_process import GaussianProcess
import util
import calc as c
from sklearn.linear_model import Lasso
import plotting as p


cat = util.get_catalog("HW")
dat = cat['dat']

proxies = ['rhillmass', 'd1', 'd2', 'd5', 'd10', 's1', 's2', 's5', 's10',
           'd5e12', 'm5e12']
# TODO: make a log flag so that m5e12 gets log scaled as well
dat = util.load_proxies(dat, 'data/HW/', proxies, proxies)

features = proxies + ['mstar']

d_train, d_test = util.split_test_train(dat)
Xtr, ytr, xtrsc, ytrsc = util.select_features(features, d_train, scaled=True)
Xts, yts, xtssc, ytssc = util.select_features(features, d_test, scaled=True)
#poly = preprocessing.PolynomialFeatures(degree=2)
#Xtr_new = poly.fit_transform(Xtr)
#Xts_new = poly.fit_transform(Xts)


gp = GaussianProcess()
gp.fit(Xtr, ytr)
y_hat = gp.predict(Xts)
#clf = Lasso(alpha=0.2)
#clf.fit(Xtr_new, ytr)
Exemple #8
0
import calc as c
import util

names = ['HW', 'Becker', 'Lu', 'Henriques', 'Illustris', 'EAGLE', 'MB-II']
data_dirs = [''.join(['data/', name, '/']) for name in names]

cats = [util.get_catalog(name) for name in names]

for name, cat, ddir in zip(names, cats, data_dirs):
    df = cat['dat']
    print name
    print df.dtype.names
    proxies = ['d5e12', 'm5e12']
    proxyname = 'dm5e12'
    df = util.load_proxies(df, ddir, proxies, proxies)
    print df.dtype.names
    features = proxies + ['mstar']
    fname = 'dm5e12_all_2.dat'

    util.train_and_dump_rwp_bins(df, features, fname, proxyname, name,
                                 cat['box_size'], red_cut=cat['red_cut'],
                                 logging=True, num_splits=1)
Exemple #9
0
import calc as c
import util

cat_name = 'HW'
cat = util.get_catalog('HW')
dat = cat['dat']
proxy_names = [ 'rhm3e12', 'rhm4e12', 'rhm5e12', 'rhm6e12', 'rhm7e12',
               'rhm8e12', 'rhm9e12', 'rhm1e13', 'rhm2e13', 'rhm3e13']

names = [proxy + '.dat' for proxy in proxy_names]
all_features = [util.load_feature_list(proxy, dat, cat) for proxy in proxy_names]
for f_list in all_features:
    dat = util.load_proxies(dat, cat['dir'], f_list, f_list)
for features, name, proxy in zip(all_features, names, proxy_names):
    print "training on: ", features + ['mstar've Oak Avenue, Apt. 4]

    util.train_and_dump_rwp_bins(dat, features + ['mstar'], name, proxy, cat_name, cat['box_size'], num_splits=3, logging=True)
Exemple #10
0
from collections import defaultdict
import calc, util, model
import plotting as p
p.set_plotting_context()


names = ['HW', 'Becker', 'Lu', 'Henriques', 'EAGLE', 'Illustris', 'MB-II']
catalogs = [util.get_catalog(name) for name in names]
data_dirs = ['data/' + name + '/' for name in names]
dfs = [cat['dat'] for cat in catalogs]
dfs = [util.load_proxies(df, ddir, ['rhillmass'], ['rhillmass']) for df, ddir
        in zip(dfs, data_dirs)]

pred_dfs = [model.trainRegressor(df,['rhillmass', 'mstar'])[1] for df in dfs]
mlims = [(9.9, 10.1), (10.1, 10.3), (10.5, 10.7)]

msbin_catalog_dict = defaultdict(dict)
for mlim, desc in zip(mlims, ['lo', 'mid', 'hi']):
    for name, df in zip(names, pred_dfs):
        msmin, msmax = mlim # density match(df, mlim)
        sel = np.where((df['mstar'] > msmin) & (df['mstar'] < msmax))[0]
        msbin_catalog_dict[name][desc] = df[sel]

nrow = len(names)
ncol = 3
i = 1
for name in names:
    for mbin, df in msbin_catalog_dict[name].items():
        plt.subplot(nrow, ncol, i)
        rhillmass_values = df['rhillmass']
        med = np.median(rhillmass_values)
Exemple #11
0
import util
import calc
import model
import matplotlib.pyplot as plt


data_dir = 'data/Henriques'
cat = util.get_catalog("Henriques")
dat = cat['dat']
dat = util.load_proxies(dat, data_dir, ['s5'], ['s5'])
df_train, df_test, _ = model.trainRegressor(dat, cat['box_size'], ['s5'])

plt.hexbin(df_test['ssfr'], df_test['pred'])