def get_data_for_float(self, dac_url, only_file=None, surface_values_only=False): '''Given a dac_url return a list of hashes of data for each profile for the float specified in dac_url. Example dac_url for float 1900722: http://tds0.ifremer.fr/thredds/catalog/CORIOLIS-ARGO-GDAC-OBSaoml/1900722/profiles/catalog.xml ''' pd = [] for profile_url in sorted(self.get_profile_opendap_urls(dac_url)): if only_file: if not profile_url.endswith(only_file): continue float = profile_url.split('/')[7] prof = str(profile_url.split('/')[-1].split('.')[0].split('_')[1]) self.logger.info('Reading data from ' + profile_url[:20] + '...' + profile_url[-50:]) try: d = self.get_profile_data(profile_url, surface_values_only=surface_values_only) pd.append({float: {prof: d}}) except RequiredVariableNotPresent as e: self.logger.warn(e) except OpenDAPServerError as e: self.logger.warn(e) return pd
def get_values(lar): st = [] pd = [] lar = str(lar) for part in lar.split(' '): if "kernel_size" in part[0:12]: ker = part[-2] if "MaxPool2d" in part: ker = part[-2] if 'AdaptiveAvgPool2d' in part: ker = [i[-2] for i in part.split(' ') if 'output_size' in i][0] s = 1 if 'stride' in part[0:7]: s = part[-2] if 'padding' in part: pd.append(int(part[-2])) else: pd.append(0) p = max(pd) return int(ker), int(s), int(p)
def calc_embs(filepaths, batch_size=64): pd = [] for start in tqdm(range(0, len(filepaths), batch_size)): aligned_images = load_and_align_images(filepaths[start:start + batch_size]) pd.append(nn4_small2.predict_on_batch(np.squeeze(aligned_images))) embs = np.array(pd) return np.array(embs)
def calc_embs(file_paths, batch_size=64): pd = [] for start in tqdm(range(0, len(file_paths), batch_size)): aligned_images = load_and_align_images(file_paths[start:start + batch_size]) pd.append(model.predict_on_batch(np.squeeze(aligned_images))) embs = l2_normalize(np.concatenate(pd)) # embs = np.array(pd) return np.array(embs)
def ProbDis(vector): #Define Probability distribution pd = [] m, n = vector.shape for i in range(m): v = 0 for j in range(n): v = v + (vector[i, j]**2) pd.append(v / n) return pd
def calc_embs(filepaths, margin=10, batch_size=10): pd = [] for start in tqdm(range(0, len(filepaths), batch_size)): aligned_images = prewhiten( load_and_align_images(filepaths[start:start + batch_size], margin)) pd.append(model.predict_on_batch(aligned_images)) embs = l2_normalize(np.concatenate(pd)) return embs
def calc_emb_test(faces): pd = [] aligned_faces = align_faces(faces) if (len(faces) == 1): pd.append(nn4_small2.predict_on_batch(aligned_faces)) elif (len(faces) > 1): pd.append(nn4_small2.predict_on_batch(np.squeeze(aligned_faces))) embs = np.array(pd) return np.array(embs)
def step_by_step_static_path_density(self, ende=None): """ Returns list. [index=Aggregation depth: static path density] """ pd = [] for i, Cn in enumerate(self.step_by_step_aggregation(ende)): print 'Static path density. Step ', i pd.append(self.path_density_of_A(Cn)) return pd
def calc_emb_test(faces): pd = [] aligned_faces = align_faces(faces) if len(faces) == 1: pd.append(model.predict_on_batch(aligned_faces)) elif len(faces) > 1: pd.append(model.predict_on_batch(np.squeeze(aligned_faces))) embs = l2_normalize(np.concatenate(pd)) # embs = np.array(pd) return np.array(embs)
def find_closest_nodes(matrix): nodes = list(matrix.columns) pd = [] # find minimal distance in matrix for n1 in nodes: for n2 in nodes: if n1==n2: continue pd.append((matrix[n1][n2], n1, n2)) pd.sort() return pd[0]
def calc_emb_test(faces): pd = [] aligned_faces = align_faces(faces) if(len(faces)==1): pd.append(nn4_small2.predict_on_batch(aligned_faces)) elif(len(faces)>1): pd.append(nn4_small2.predict_on_batch(np.squeeze(aligned_faces))) #embs = l2_normalize(np.concatenate(pd)) embs = np.array(pd) #print(embs) return np.array(embs)
def calc_embs(filepaths, margin=10, batch_size=1): aligned_images = prewhiten(load_and_align_images(filepaths, margin)) pd = [] current = 0 for start in range(0, len(aligned_images), batch_size): total = len(aligned_images) if current % 5 == 0: print('{} / {}'.format(current, total)) pd.append( model.predict_on_batch(aligned_images[start:start + batch_size])) current += 1 embs = l2_normalize(np.concatenate(pd)) return embs
def arima(df,time_id,lookback, p,d,q): Log(LOG_INFO) << "Computing arima(%d,%d,%d) with lookback: %d " % (p,d,q,lookback) pd=[] for tid in time_id: # pdb.set_trace() series = np.log(df[OPEN_KEY][tid-lookback:tid].values) model = ARIMA(series,order=(p,d,q)) model_fit = model.fit(method_kwargs={"warn_convergence": False}) output = model_fit.forecast() p0 = np.log(df[OPEN_KEY][tid]) err = (output[0]-p0)/p0 pd.append(err) pd = np.array(pd) return pd.reshape(-1,1)
def s(self, q, pars=None): if not hasattr(self, '_atomic_formfactors'): self._atomic_formfactors = formFactor(q, self.Z) if pars is None: pars = self.par0 else: # print(pars) # print(self.par0.keys()) assert all([key in pars.keys() for key in self.par0.keys()]), \ 'the input parameter dict does not contain all necessary parameter keys' if self.reparameterized: pars = self.convert(pars) if not self.dispersed: self.transform(pars) return Debye(q, self, f=self._atomic_formfactors) else: pd = [] wd = [] for t in self._associated_transformation: if t.dw: _p, _w = t.dw.disperse(pars, t.name) else: _p, _w = pars[t.name], 1 pd.append(_p) wd.append(_w) pd_grid = [i.ravel() for i in np.meshgrid(*pd)] wd_grid = [i.ravel() for i in np.meshgrid(*wd)] n = len(pd_grid[0]) # number of combinations # _bla = 0 _s = np.zeros(q.shape) for i in range(n): _p_dict = {} _w = 1 for j, key in enumerate(self._t_keys): _p_dict[key] = pd_grid[j][i] _w *= wd_grid[j][i] self.transform(_p_dict) _s += _w * Debye(q, self, f=self._atomic_formfactors) return _s
def test_compare_with_closed_form(self): """Test that compares the computed with the analytical CRPS.""" pd_single = norm(0, 1) pd = [] for i in range(0, 3): pd.append(pd_single) meas = [-1, 0, 1] mean_crps, single_crps = crps(pd, meas) def crps_closed_form(pd, meas): return meas * (2 * pd.cdf(meas) - 1) + 2 * pd.pdf(meas) - 1 / np.sqrt(np.pi) crps_analytical = list(map(crps_closed_form, pd, meas)) is_good = np.isclose(np.array(single_crps), np.array(crps_analytical)).all() assert_true(is_good, msg="Computed CRPS is not equal to analytical CRPS.")
def test_compare_different_expectations(self): """Test that compares same distance between meas and pd.""" pd_single = norm(0, 1) pd = [] for i in range(0, 3): pd.append(pd_single) meas = [-1, 0, 1] mean_crps1, single_crps1 = crps(pd, meas) pd2 = [] for i in range(0, 3): pd2.append(norm(i, 1)) meas2 = [-1, 1, 3] mean_crps2, single_crps2 = crps(pd2, meas2) is_good = np.equal(single_crps1, single_crps2).all() assert_true(is_good, msg="Relation of individual CPRS values should return same value.")
def test_compare_different_expectations(self): """Test that compares same distance between meas and pd.""" pd_single = norm(0, 1) pd = [] for i in range(0, 3): pd.append(pd_single) meas = [-1, 0, 1] meanCRIGN1, singleCRIGN1 = crign.crign(pd, meas) pd2 = [] for i in range(0, 3): pd2.append(norm(i, 1)) meas2 = [-1, 1, 3] meanCRIGN2, singleCRIGN2 = crign.crign(pd2, meas2) is_good = np.isclose(singleCRIGN1, singleCRIGN2).all() assert_true( is_good, msg= "Relation of individual CRIGN values should return roughly the same value." )
def getClusterCenters(data, k): rows, cols = data.shape centers = np.zeros((k, cols)) r = np.random.choice(rows, 1) #Initial Choice centers[0, :] = data[r, :] k_center = 1 while k_center < k: pd = [] #pd represents probability distribution dist_sum = 0 for i in range(rows): max_dist = 1000 for j in range(k_center): dist = np.linalg.norm(data[i] - centers[j])**2 if dist < max_dist: max_dist = dist d = max_dist * max_dist dist_sum += d pd.append(d) for i in range(rows): pd[i] = pd[i] / dist_sum r = np.random.choice(rows, 1, p=pd) centers[k_center, :] = data[r, :] k_center = k_center + 1 return centers
def map_to_chart(candle, pd, stock): temp = DailyData.chart(candle['date'], stock, candle['open'], candle['high'], candle['low'], candle['close'], candle['volume'], "NOT AVAILABLE", "NOT AVAILABLE", "NOT AVAILABLE") pd = pd.append( { 'date': temp.get_date(), 'symbol': stock, 'open': temp.get_open(), 'high': temp.get_high(), 'low': temp.get_low(), 'close': temp.get_close(), 'volume': temp.get_volume(), 'change': temp.get_change(), 'changePercent': temp.get_changePercent(), 'vwap': temp.get_vwap() }, ignore_index=True) return pd
def get_pm25_df(html_path): with open(html_path, encoding="utf-8") as html_file: soup = BeautifulSoup(html_file, "html.parser") pm25_df = pd.DataFrame( columns=["hour", "pm25_value_max", "pm25_value_min"]) container_tag = soup.find_all("div", {"class": "whitebody"})[0] parent_tag = container_tag.center.div.find("div", { "class": "forecast-body" }).find("div", { "class": "forecast-body-table" }).table list_of_pm25 = parent_tag.find("tr", { "class": "wf-row-pm25" }).find_all("td")[1:] hour = 0 for each_point in list_of_pm25: try: this_hour = hour max_pm25 = int( each_point.div.find("div", { "class": "wf-cell-aqi-val-max" }).text) min_pm25 = int( each_point.div.find("div", { "class": "wf-cell-aqi-val-min" }).text) pm25_df = pm25_df.append( { "hour": this_hour, "pm25_value_max": max_pm25, "pm25_value_min": min_pm25 }, ignore_index=True) hour += 3 hour = hour % 24 except: print("this one dose not contain info") return pm25_df
def condensematrix(dm, pd, names, key, hrf='canonical', op='mult'): # returns condition with probe removed import copy as cp import numpy as np delays = None if hrf == 'fir': delays = [] for i in dm.names: if i == 'constant': delays.append('-1') else: delays.append(i.split('_')[i.split('_').index('delay') + 1]) delays = np.array(delays, dtype=int) if op == 'stack': for i in dm.names: if (i != 'constant'): if (i.split('_')[i.split('_').index(key) + 1] != '0'): pd.append(dm.matrix[:, dm.names.index(i)]) names.append(i.replace('glm_label_', '')) else: idx = [] for i in dm.names: if i == 'constant': idx.append('0') else: idx.append(i.split('_')[i.split('_').index(key)+1]) idx = np.array(idx, dtype=float) if delays is not None: for d in np.arange(np.max(delays)+1): outkey = key + '_delay_' + str(d) outidx = idx[delays == d] pd.append(np.dot(dm.matrix[:, delays==d], outidx)) names.append(outkey) else: pd.append(np.dot(dm.matrix, idx)) names.append(key)
def condensematrix(dm, pd, names, key, hrf='canonical', op='mult'): # returns condition with probe removed import copy as cp import numpy as np delays = None if hrf == 'fir': delays = [] for i in dm.names: if i == 'constant': delays.append('-1') else: delays.append(i.split('_')[i.split('_').index('delay') + 1]) delays = np.array(delays, dtype=int) if op == 'stack': for i in dm.names: if (i != 'constant'): if (i.split('_')[i.split('_').index(key) + 1] != '0'): pd.append(dm.matrix[:, dm.names.index(i)]) names.append(i.replace('glm_label_', '')) else: idx = [] for i in dm.names: if i == 'constant': idx.append('0') else: idx.append(i.split('_')[i.split('_').index(key) + 1]) idx = np.array(idx, dtype=float) if delays is not None: for d in np.arange(np.max(delays) + 1): outkey = key + '_delay_' + str(d) outidx = idx[delays == d] pd.append(np.dot(dm.matrix[:, delays == d], outidx)) names.append(outkey) else: pd.append(np.dot(dm.matrix, idx)) names.append(key)
type(df.ix[['a','b'], 'Two']) type(df.ix[['a','b'],:]) df.ix[['a','b'],:] df.ix[['a','b'],] df.plot() matplotlib.show() matplotlib.Show() import matplotlib as mpl df.show() df1 = pd.DataFrame(np.random.randn(6,3), columns=['A','B','C']) df2 = pd.DataFrame(np.random.randn(6,3), columns=['D','E','F']) df3 = df1.copy() df1 df = pd.concat([df1, df2]) df df = pd.append([df1, df2]) help(pd.concat) df = pd.append([df1, df2], join='Inner') df = pd.concat([df1, df2], join='Inner') df = pd.concat([df1, df2], join=inner) df = pd.concat([df1, df2], join='inner') df df = pd.concat([df1, df2], join_axes='inner') df = df1.append(df2) df df = pd.concat([df1, df2], axes=1) df = df1.append(df2, axis=1) df = df1.append(df2, ignore_index=1) df df = df1.append(df2, ignore_index=0) df
baseUrl='https://movie.douban.com/top250?start=' headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.56' } movieDataList = [] for j in range(0,2): star=j*25 url=baseUrl+str(star) req = Request(url=url, headers=headers, method="GET") response = urlopen(req).read().decode("utf-8") soup = BeautifulSoup(response, 'html.parser') movielist = soup.find_all("div", class_="item") for i in range(0, 25): moviename = re.search(r'<span class="title">(.*?)</span>', str(movielist[i])) movevedio = movielist[i].select(".info > .hd > a")[0] vedioname = re.search(r'<a class="" href="(.*?)">', str(movevedio)) movieimg = movielist[i].select(".pic>a>img") img = re.search('src="(.*?)"', str(movieimg)) movedaoyan = movielist[i].select(".info>.bd>p")[0] daoyan = re.search('<p class="">([\s\S]*?)</p>', str(movedaoyan)) jianjie = daoyan.group(1).strip().replace(" ", '').replace("\n", '').replace("<br/>", '').replace('/', '') moviecomment = movielist[i].select(".star>.rating_num")[0].get_text() jieshao = movielist[i].select('.quote>.inq')[0].get_text() movedata = (moviename.group(1), vedioname.group(1), img.group(1), jianjie, moviecomment, jieshao) # print(movedata) movieDataList.append(movedata) print(movieDataList) print(len(movieDataList))
prob_df = pd.DataFrame() prob_df['label'] = y_test prob_df['prob_down'] = prob_down global quantile quantile = np.percentile(prob_down,list(list(range(0,101,10)))) prob_df['group'] = prob_df['prob_down'].apply(group_prob) pd = [] td = [] for i in range(1,11): group_df = prob_df[prob_df['group']==i] true_down_prob = stats.itemfreq(group_df['label'])[0][1]/group_df.shape[0] print('group:',i,'predict_prob:',group_df['prob_down'].median(),'true_prob:',true_down_prob) pd.append(group_df['prob_down'].median()) td.append(true_down_prob) print('mse',mean_squared_error(pd,td)) ############ Lasso model ############### from sklearn import linear_model from sklearn.metrics import r2_score df = pd.read_csv('/Users/vikasnatesh/Downloads/labeled_data_10s.csv') # Train Test split (80% train, 20% test)
def make_parammat(dm, hrf='canonical', zscore=False): # remove anything with a 0 and include probe as a feature # assuming dm is a dict import numpy as np out = dm[dm.keys()[0]] pd = [] names = [] for key in dm.keys(): if key == 'motion': names.append('motion_0') pd.append(np.dot(dm[key].matrix, np.array([1, 0, 0, 0, 0, 0, 0]))) names.append('motion_1') pd.append(np.dot(dm[key].matrix, np.array([0, 1, 0, 0, 0, 0, 0]))) names.append('motion_2') pd.append(np.dot(dm[key].matrix, np.array([0, 0, 1, 0, 0, 0, 0]))) names.append('motion_3') pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 1, 0, 0, 0]))) names.append('motion_4') pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 0, 1, 0, 0]))) names.append('motion_5') pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 0, 0, 1, 0]))) # hardcode stim and verb elif key == 'stim' or key == 'verb' or key == 'anim': condensematrix(dm[key], pd, names, key, hrf, op='stack') else: condensematrix(dm[key], pd, names, key, hrf, op='mult') # don't need constant because normalized data # pd.append(np.ones(np.shape(pd[-1]))) # names.append('constant') if zscore == True: out.matrix = zs(np.array(pd).T) else: out.matrix = (np.array(pd).T) out.names = names return out
# winter winter = joindf[(joindf.index.month == 12) | (joindf.index.month == 1) | (joindf.index.month == 2)] spring = joindf[(joindf.index.month > 2) & (joindf.index.month < 6)] summer = joindf[(joindf.index.month > 5) & (joindf.index.month < 9)] autumn = joindf[(joindf.index.month > 8) & (joindf.index.month < 12)] correlation = { 'annual': joindf.corr().iloc[0, 1], 'winter': winter.corr().iloc[0, 1], 'spring': spring.corr().iloc[0, 1], 'summer': summer.corr().iloc[0, 1], 'autumn': autumn.corr().iloc[0, 1] } # funciona com xarray mas nao com o pandas... nstorm_monthly_mean = nstorms.groupby(by=nstorms.index.month).mean() for i in np.arange(1, 13, 1): df = nstorms[nstorms.index.month == i] - nstorm_monthly_mean.year[i] df.columns = ['norm'] result = pd.append([nstorms, df], axis=1, join='outer') nstorms_norm = nstorms_norm.reset_index(drop=True) indices_crop = indices_crop.reset_index(drop=True) correlation = indices_crop.corr(nstorms_norm.values, method='pearson') plt.figure()
def make_parammat(dm, hrf='canonical', zscore=False): # remove anything with a 0 and include probe as a feature # assuming dm is a dict import numpy as np out = dm[dm.keys()[0]] pd = [] names = [] for key in dm.keys(): if key == 'motion': names.append('motion_0') pd.append(np.dot(dm[key].matrix, np.array([1, 0, 0, 0, 0, 0, 0]))) names.append('motion_1') pd.append(np.dot(dm[key].matrix, np.array([0, 1, 0, 0, 0, 0, 0]))) names.append('motion_2') pd.append(np.dot(dm[key].matrix, np.array([0, 0, 1, 0, 0, 0, 0]))) names.append('motion_3') pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 1, 0, 0, 0]))) names.append('motion_4') pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 0, 1, 0, 0]))) names.append('motion_5') pd.append(np.dot(dm[key].matrix, np.array([0, 0, 0, 0, 0, 1, 0]))) # hardcode stim and verb elif key == 'stim' or key == 'verb' or key == 'anim': condensematrix(dm[key], pd, names, key, hrf, op='stack') else: condensematrix(dm[key], pd, names, key, hrf, op='mult') # don't need constant because normalized data # pd.append(np.ones(np.shape(pd[-1]))) # names.append('constant') if zscore==True: out.matrix = zs(np.array(pd).T) else: out.matrix = (np.array(pd).T) out.names = names return out
from scraper import * import pandas as pd link = ['https://www.ah.nl/producten/product/wi48405/ah-basic-havermout', 'https://www.ah.nl/producten/product/wi383520/ah-amandel-drink-ongezoet'] name = ['oats', 'almond milk'] portion = [60, 200] for idx in range(len(link)): get_nutrients(link[idx], name[idx]) foods = pd.append(get_nutrients(link, name), foods) foods.to_csv('foods.csv') #meal 1 oats # meal 2 chicken and veg # introduce class mealplan, subclasses ingredients/food, contains targets, ingredients, stock
import gc def fix_time(df, time_cols): for time_col in time_cols: df[time_col] = pd.to_datetime(df[time_col], errors='coerce', format='%Y%m%d') return df gc.enable() df_train = pd.read_csv('~/train.csv') df_train = pd.append((df_train, pd.read_csv('~/train_v2.csv')), axis=0, ignore_index=True).reset_index(drop=True) df_test = pd.read_csv('~/sample_submission_v2.csv') df_members = pd.read_csv('~/members_v3.csv') df_transactions = pd.read_csv('~/transactions.csv') df_transactions = pd.append( (df_transactions, pd.read_csv('~/transactions_v2.csv')), axis=0, ignore_index=True).reset_index(drop=True) df_transactions = df_transactions.sort_values( by=['transaction_date'], ascending=[False]).reset_index(drop=True) df_transactions = df_transactions.drop_duplicates(subset=['msno'], keep='first')
import csv import pandas as pd file1 = ("C:/Users/Krithi/Desktop/Python/bright_star.csv") file2 = ("C:/Users/Krithi/Desktop/Python/convertedStars.csv") d1 = [] d2 = [] with open(file1, "r", encoding="utf8") as f: csvreader = csv.reader(f) for i in csvreader: d1.append(i) with open(file2, "r", encoding="utf8") as f: csvreader = csv.reader(f) for i in csvreader: d2.append(i) h1 = d1[0] h2 = d2[0] pd1 = d1[1:] pd2 = d2[1:] h = h1 + h2 pd = [] for i in pd1: pd.append(i) for j in pd2: pd.append(j) with open("totalStars.csv", "w", encoding='utf8') as f: csvwriter = csv.writer(f) csvwriter.writerow(h) csvwriter.writerows(pd) #df = pd.read_csv("totalStars.csv") #df.tail(8)
for i in range(1, number_of_steps): if label == 'mlp' or label == 'cnn' or label == 'lstm': model.set_weights(init_weights) if i * data_batch > X_train.shape[0] - 1: # if label == 'mlp' or label == 'cnn' or label == 'lstm': history = model.fit(X_train[:X_train.shape[0] - 1, :], y_train[:y_train.shape[0] - 1], epochs=100, validation_data=(X_test, y_test), callbacks=[early_stopping], workers=8) # else: # history = model.fit(X_train[:X_train.shape[0] - 1, :], y_train[:y_train.shape[0] - 1]) # validation_data=(X_test, y_test), workers=8) predictions['size_' + str(i * data_batch)] = model.predict(X_test, y_test) temp = pd.DataFrame(history.history) stats = pd.append([stats, temp.iloc[temp.shape[0] - 1, :]]) break # if label == 'mlp' or label == 'cnn' or label == 'lstm': history = model.fit(X_train[:i * data_batch], y_train[:i * data_batch], epochs=100, validation_data=(X_test, y_test), callbacks=[early_stopping], workers=8) # else: # history = model.fit(X_train[:i * data_batch], y_train[:i * data_batch]) # ,validation_data=(X_test, y_test), workers=8) predictions['size_' + str(i)] = np.argmax(model.predict(X_test))#, axis=1) temp = pd.DataFrame(history.history) stats = stats.append(temp.iloc[temp.shape[0] - 1, :]) # Log of models data