def send_to_db(self): conn = sqlite3.connect('data2.sqlite', timeout=30) c = conn.cursor() df = DataFrame(self.__dict__.items(), index=self.__dict__.keys()) df = df.drop(0,1) df = df.transpose() df = df.sort(axis=1) df.to_sql('earnings_calendar', conn, if_exists='append', index=False)
def testDataFrame(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=["a", "b"], columns=["x", "y", "z"]) # column indexed outp = DataFrame(ujson.decode(ujson.encode(df))) self.assertTrue((df == outp).values.all()) assert_array_equal(df.columns, outp.columns) assert_array_equal(df.index, outp.index) outp = DataFrame(**ujson.decode(ujson.encode(df, orient="split"))) self.assertTrue((df == outp).values.all()) assert_array_equal(df.columns, outp.columns) assert_array_equal(df.index, outp.index) outp = DataFrame(ujson.decode(ujson.encode(df, orient="records"))) outp.index = df.index self.assertTrue((df == outp).values.all()) assert_array_equal(df.columns, outp.columns) outp = DataFrame(ujson.decode(ujson.encode(df, orient="values"))) outp.index = df.index self.assertTrue((df.values == outp.values).all()) outp = DataFrame(ujson.decode(ujson.encode(df, orient="index"))) self.assertTrue((df.transpose() == outp).values.all()) assert_array_equal(df.transpose().columns, outp.columns) assert_array_equal(df.transpose().index, outp.index)
def testDataFrame(self): df = DataFrame([[1,2,3], [4,5,6]], index=['a', 'b'], columns=['x', 'y', 'z']) # column indexed outp = DataFrame(ujson.decode(ujson.encode(df))) self.assertTrue((df == outp).values.all()) assert_array_equal(df.columns, outp.columns) assert_array_equal(df.index, outp.index) dec = _clean_dict(ujson.decode(ujson.encode(df, orient="split"))) outp = DataFrame(**dec) self.assertTrue((df == outp).values.all()) assert_array_equal(df.columns, outp.columns) assert_array_equal(df.index, outp.index) outp = DataFrame(ujson.decode(ujson.encode(df, orient="records"))) outp.index = df.index self.assertTrue((df == outp).values.all()) assert_array_equal(df.columns, outp.columns) outp = DataFrame(ujson.decode(ujson.encode(df, orient="values"))) outp.index = df.index self.assertTrue((df.values == outp.values).all()) outp = DataFrame(ujson.decode(ujson.encode(df, orient="index"))) self.assertTrue((df.transpose() == outp).values.all()) assert_array_equal(df.transpose().columns, outp.columns) assert_array_equal(df.transpose().index, outp.index)
def network_perf(systems, unique_id, group_number, detail_options, rampup_value=0, current_dir=""): have_net_data = False sets = search_item(systems, unique_id, "network", r"(.*)", [], []) modes = ['bandwidth', 'requests_per_sec'] for mode in sorted(modes): results = {} for system in sets: net = [] series = [] global_perf = 0.0 for perf in sets[system]: if perf[1] == mode: if not perf[1] in net: net.append(perf[1]) global_perf = global_perf + float(perf[3]) series.append(global_perf) results[system] = Series(series, index=net) df = DataFrame(results) details = [] matched_category = [] for net in df.transpose().columns: if have_net_data is False: print() print("Group %d : Checking network disks perf" % group_number) have_net_data = True consistent = [] curious = [] unstable = [] # How much the variance could be far from the average (in %) tolerance_max = 15 tolerance_min = 2 print_perf(tolerance_min, tolerance_max, df.transpose()[net], df, mode, net, consistent, curious, unstable, "", rampup_value, current_dir) if mode == 'bandwidth': unit = "MB/sec" else: unit = "RRQ/sec" prepare_detail(detail_options, group_number, mode, net, details, matched_category) print_summary("%-30s %s" % (mode, net), consistent, "consistent", unit, df) print_summary("%-30s %s" % (mode, net), curious, "curious", unit, df) print_summary("%-30s %s" % (mode, net), unstable, "unstable", unit, df) print_detail(detail_options, details, df, matched_category)
def unMap(self,X, Y, ycat): newX = [] newY = [] for i in range(X.shape[0]): # x = self.cats[0].categories[X[i]] # y = ycat.categories[Y[i]] # x = "ISIN%d" % (X[i]) y = "Stripe %d" % (Y[i]) # newX.append(x) newY.append(y) dataFrame = DataFrame([X, newY]) dataFrame.transpose().to_csv("output.csv", index=False, header=["ISIN", "Risk_Stripe"])
def from_json_to_dataframe(): results = json.load(open('./networks/first_level_analysis.json','r')) df = DataFrame(results) df.to_csv("panels.csv") dft = df.transpose() dft.to_csv("panels_trans.csv") return df
def analyze_first_level_panels(): results = {} for d in first_level_topic_list: print "\n*********DESCRIPTOR: " + first_level_topic_list[d] + "(" + str(d) + ")" G = build_panel_network_by_descriptor(d) print "\nDESCRIPTOR: " + first_level_topic_list[d] + "(" + str(d) + ")" print "Nodes:", G.number_of_nodes() print "Edges:", G.number_of_edges() res_clique = analize_cliques(G) res_degree = analize_degrees(G) res_weight = analize_edges(G) d_final = dict(res_clique) d_final.update(res_degree) d_final.update(res_weight) d_final['id'] = d d_final['avg_clustering'] = nx.average_clustering(G) results[first_level_topic_list[d]] = d_final print "Writing json..." json.dump(results, open('./networks/first_level_panels_analysis.json','w'), indent = 2) print "Writing csvs..." df = DataFrame(results) df.to_csv('./networks/first_level_panels_analysis.csv') dfinv = df.transpose() dfinv.to_csv('./networks/first_level_panels_analysis_inv.csv')
def plot_phonemes(path): phoneme_embeddings = dict() for line in codecs.open(path,"r"): line = line.split(",") key= line[0][1:-1] emb = line[1:] emb[-1] = emb[-1][:-1] emb = np.array([float(e) for e in emb]) phoneme_embeddings[key] = emb phoneme_embeddings = DataFrame(phoneme_embeddings,columns=phoneme_embeddings.keys()) print(phoneme_embeddings.columns) m = TSNE() phoneme_embeddings_tsne = m.fit_transform(phoneme_embeddings.transpose()) print(len(phoneme_embeddings_tsne)) for p,emb in zip(phoneme_embeddings.columns, phoneme_embeddings_tsne): c = "black" if regex.search("^[aeiou3E][*]?$", p): c = "red" plt.annotate(p,(emb[0],emb[1]),color=c) if regex.search("^.*w~$", p): c = "blue" plt.annotate(p,(emb[0],emb[1]),color=c) if regex.search("^.*y~$", p): c = "yellow" plt.annotate(p,(emb[0],emb[1]),color=c) if regex.search("^.*h~$", p): c = "brown" plt.annotate(p,(emb[0],emb[1]),color=c) if regex.search("^.*\"$", p): c = "green" plt.annotate(p,(emb[0],emb[1]),color=c)
def diagnostic_table(self, num_years=5, base_calc=None): table = [] row_years = [] calc = copy.deepcopy(self) base_calc = copy.deepcopy(base_calc) for i in range(0, num_years): has_behavior = (calc.behavior.BE_sub or calc.behavior.BE_inc or calc.behavior.BE_CG_per) if has_behavior: base_calc.calc_all() behavior_calc = behavior(base_calc, calc) behavior_calc.diagnostic_table_items(table) else: calc.calc_all() calc.diagnostic_table_items(table) row_years.append(calc.policy.current_year) if i < num_years - 1: calc.increment_year() if base_calc is not None: base_calc.increment_year() df = DataFrame(table, row_years, ["Returns (#m)", "AGI ($b)", "Itemizers (#m)", "Itemized Deduction ($b)", "Standard Deduction Filers (#m)", "Standard Deduction ($b)", "Personal Exemption ($b)", "Taxable income ($b)", "Regular Tax ($b)", "AMT income ($b)", "AMT amount ($b)", "AMT number (#m)", "Tax before credits ($b)", "refundable credits ($b)", "nonrefundable credits ($b)", "Misc. Surtax ($b)", "Ind inc tax ($b)", "Payroll tax ($b)"]) df = df.transpose() pd.options.display.float_format = '{:8,.1f}'.format return df
def test_dataframe(self, orient, numpy): if orient == "records" and numpy: pytest.skip("Not idiomatic pandas") df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[ "a", "b"], columns=["x", "y", "z"]) encode_kwargs = {} if orient is None else dict(orient=orient) decode_kwargs = {} if numpy is None else dict(numpy=numpy) output = ujson.decode(ujson.encode(df, **encode_kwargs), **decode_kwargs) # Ensure proper DataFrame initialization. if orient == "split": dec = _clean_dict(output) output = DataFrame(**dec) else: output = DataFrame(output) # Corrections to enable DataFrame comparison. if orient == "values": df.columns = [0, 1, 2] df.index = [0, 1] elif orient == "records": df.index = [0, 1] elif orient == "index": df = df.transpose() tm.assert_frame_equal(output, df, check_dtype=False)
def diagnostic_table(self, num_years=5, base_calc=None): table = [] row_years = [] calc = copy.deepcopy(self) base_calc = copy.deepcopy(base_calc) for i in range(0, num_years): if calc.behavior.has_response(): base_calc.calc_all() behavior_calc = Behavior.response(base_calc, calc) behavior_calc.diagnostic_table_items(table) else: calc.calc_all() calc.diagnostic_table_items(table) row_years.append(calc.policy.current_year) if i < num_years - 1: calc.increment_year() if base_calc is not None: base_calc.increment_year() df = DataFrame(table, row_years, ['Returns (#m)', 'AGI ($b)', 'Itemizers (#m)', 'Itemized Deduction ($b)', 'Standard Deduction Filers (#m)', 'Standard Deduction ($b)', 'Personal Exemption ($b)', 'Taxable income ($b)', 'Regular Tax ($b)', 'AMT income ($b)', 'AMT amount ($b)', 'AMT number (#m)', 'Tax before credits ($b)', 'refundable credits ($b)', 'nonrefundable credits ($b)', 'Misc. Surtax ($b)', 'Ind inc tax ($b)', 'Payroll tax ($b)', 'Combined liability ($b)']) df = df.transpose() pd.options.display.float_format = '{:8,.1f}'.format return df
def make_league_df(): from pandas import DataFrame from members import import_teams div_teams = import_teams() div_bins = load_all_substitute_bins() bins = sum(div_bins, []) targets = ['Jessica', 'Lexie'] results = {} for target in targets: for bin_num, bin in enumerate(bins): for person in bin: if target in person: break if target in results: break from pprint import pprint people = {} def find_team (name, teams): for team_idx, team in enumerate(teams): for person in team: if name in person: return team_idx return -1 for div_idx, (bins, teams) in enumerate(zip(div_bins, div_teams)): for bin_idx, bin in enumerate(bins): for person in bin: team_idx = find_team(person, teams) int_team = find_team(person, div_teams[1]) people[person] = {'div': div_idx, 'bin': bin_idx, 'team': team_idx, 'int_team': int_team} df = DataFrame(people) df = df.transpose() comp_busy = [4, 6, 7] # really, 5, 7, 8 int_busy = [11, 9, 1] # really, 12, 10, 2 print('\n\nbusy') brian_int = df.loc[df['bin'] == 0].loc[df['div'] == 2].loc[ df['int_team'].isin(int_busy)] lexies = df.loc[df['bin'] == 1].loc[df['div'] == 2].loc[ df['team'].isin(comp_busy)] jeses = df.loc[df['bin'] == 0].loc[df['div'] == 2].loc[ df['team'].isin(comp_busy)] pprint(", ".join(brian_int.index.values)) pprint(", ".join(lexies.index.values)) pprint(", ".join(jeses.index.values)) print('\n\nyes') brian_int = df.loc[df['bin']==0].loc[df['div']==2].loc[~df['int_team'].isin(int_busy)] lexies = df.loc[df['bin']==1].loc[df['div']==2].loc[~df['team'].isin(comp_busy)] jeses = df.loc[df['bin']==0].loc[df['div']==2].loc[~df['team'].isin(comp_busy)] pprint(len(brian_int)) pprint(len(lexies)) pprint(len(jeses)) print(", ".join(brian_int.index.values)) print(", ".join(lexies.index.values)) print(", ".join(jeses.index.values))
def testDataFrameNumpy(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], index=["a", "b"], columns=["x", "y", "z"]) # column indexed outp = DataFrame(ujson.decode(ujson.encode(df), numpy=True)) self.assertTrue((df == outp).values.all()) assert_array_equal(df.columns, outp.columns) assert_array_equal(df.index, outp.index) dec = _clean_dict(ujson.decode(ujson.encode(df, orient="split"), numpy=True)) outp = DataFrame(**dec) self.assertTrue((df == outp).values.all()) assert_array_equal(df.columns, outp.columns) assert_array_equal(df.index, outp.index) outp = DataFrame(ujson.decode(ujson.encode(df, orient="index"), numpy=True)) self.assertTrue((df.transpose() == outp).values.all()) assert_array_equal(df.transpose().columns, outp.columns) assert_array_equal(df.transpose().index, outp.index)
def parse_data_model(self, full_df): data_model = {} levels = ['specimens', 'samples', 'sites', 'locations', 'ages', 'measurements', 'criteria', 'contribution'] for level in levels: df = DataFrame(full_df['tables'][level]['columns']) data_model[level] = df.transpose() # replace np.nan with None data_model[level] = data_model[level].where((pd.notnull(data_model[level])), None) return data_model
def append_village_areas(divname): im_vil = pd.read_csv('../data/%s_village_images.csv' % divname.lower()) shape_helper = ShapeHelper('../data/shapefiles/fixed_village_shapefiles/%s/%s.shp' % (divname.lower(), divname.lower()), lat_offset, lon_offset) areas = shape_helper.get_shape_areas('village') areas_df = DataFrame(areas, index=['area']) areas_df = areas_df.transpose() areas_df.reset_index(inplace=True) areas_df.rename(columns={'index': 'village'}, inplace=True) im_vil_areas = pd.merge(im_vil, areas_df, how='left') im_vil_areas.set_index('image', inplace=True) im_vil_areas.to_csv('../data/%s_village_areas_images.csv' % divname.lower())
def annotate(ann, ccols, ocols, clust, c): to_add = open(ann, 'r') head = next(to_add) head = head.rstrip('\n') bids = head.split('\t') # SHOULD HAVE ROW HEADERS Cols = bids[1:] maps = ('Reds', 'Reds', 'Greys', 'Greens') k = 0 annot = [] for line in to_add: line = line.rstrip('\n') data = line.split('\t') to_map = data[1:] rmap = [] newCols = [] # reorg data to match cluster for i in ccols: rmap.append(to_map[Cols.index(ocols[i])]) newCols.append(ocols[i]) rmap = np.asarray(rmap) Rows = [] Rows.append(data[0]) # flag if qualitative q = 0 if isint(rmap[0]): rmap = rmap.astype(np.float) else: q = 1 qdict = {} j = 0 for i in xrange(0, len(rmap), 1): if rmap[i] not in qdict: qdict[rmap[i]] = j sys.stderr.write(str(j) + ' ' + rmap[i] + '\n') j += 1 rmap[i] = qdict[rmap[i]] rmap = rmap.astype(np.float) df = DataFrame(rmap, index=ocols, columns=Rows) df = df.transpose() new, cur = plt.subplots() cur = sns.heatmap(df, cmap=maps[k], rasterized=True) new.set_figheight(2) new.set_figwidth(c) new.set_dpi(600) cur.set_xticklabels(newCols, rotation=90) new.savefig('test' + str(k) + '.pdf') annot.append(new) k += 1 return annot
def parse_data_model(self, full_df): """ Format the data model into a dictionary of DataFrames. """ data_model = {} levels = ['specimens', 'samples', 'sites', 'locations', 'ages', 'measurements', 'criteria', 'contribution', 'images'] criteria_map = DataFrame(full_df['criteria_map']) for level in levels: df = DataFrame(full_df['tables'][level]['columns']) data_model[level] = df.transpose() # replace np.nan with None data_model[level] = data_model[level].where((pd.notnull(data_model[level])), None) return data_model, criteria_map
def file_prep(file): df = DataFrame(read_csv(file, sep = '\t')) df.drop(df[df.apply(allele_count, axis = 1) != 2].index, inplace = True) major_freqs = df.apply(major_prop_find, axis = 1) major_alleles = df.apply(major_find, axis =1 ) df.insert(3,'major_freqs', major_freqs) df.insert(3,'major_alleles', major_alleles) df = df.transpose() chrom, chrom_idx = np.unique(df.loc['chrom'], return_index=True) super_missing_df = df == '.' chromosome_dict = {} for number in np.unique(df.loc['chrom']): chromosome_dict[number] = df.loc['chrom'][df.loc['chrom'] == number].index return df, super_missing_df, chromosome_dict
def main(): train_set = create_dataset(N) test_set = create_dataset(N) df_ws = DataFrame() fig = plt.figure() for c, m in enumerate(M): f, ws = resolve(train_set, m) df_ws = df_ws.append(Series(ws, name="M=%d" % m)) subplot = fig.add_subplot(2, 2, c + 1) subplot.set_xlim(-0.05, 1.05) subplot.set_ylim(-1.5, 1.5) subplot.set_title("M=%d" % m) subplot.scatter(train_set.x, train_set.y, marker='o', color='blue') linex = np.linspace(0, 1, 101) liney = np.sin(2 * np.pi * linex) subplot.plot(linex, liney, color='green', linestyle='--') linex = np.linspace(0,1,101) # like probabilty density function liney = f(linex) label = "E(RMS)=%.2f" % rmse(train_set, f) subplot.plot(linex, liney, color='red', label=label) subplot.legend(loc=1) print("Table of the coeffcients") print(df_ws.transpose()) fig.show() df = DataFrame() for m in range(0, 10): f, ws = resolve(train_set, m) train_error = rmse(train_set, f) test_error = rmse(test_set, f) df = df.append( Series([train_error, test_error], index=['Training set', 'Test set']), ignore_index=True) df.plot(title='RMS Error', style=['-', '--'], grid=True, ylim=(0, 0.9)) plt.show()
def apply(self, transforms, axis=0): if isinstance(transforms, Transform) \ or \ (isinstance(transforms, type) and issubclass(transforms, Transform)): transform = transforms #only a single object passed (not a list) return transform.__eapply__(self) elif isinstance(transforms, (types.FunctionType, types.BuiltinFunctionType, functools.partial)): func = transforms #only a single object passed (not a list) transformed_data_df = DataFrame(self.data_df.apply(func, axis=axis)) #transpose to return the samples as column namess rather than row names if axis == 0 : transformed_data_df = transformed_data_df.transpose() return self.with_data_df(transformed_data_df) elif isinstance(transforms, list): transformed_exp = self for transform in transforms: transformed_exp = transform.__eapply__(transformed_exp) return transformed_exp else: raise NotImplementedError
def save(filename, results): filename += '.xlsx' df = DataFrame(data=results) # print(df) df = df.transpose() df.to_excel(filename, sheet_name='sheet1', index=False, header=False)
def buy6030(self, sym, direction="Bull", exp="", budget=500): if direction == "Bull": right = "Put" else: right = "Call" if exp == "": d = datetime.date.today() d += datetime.timedelta(10) while d.weekday() != 4: d += datetime.timedelta(1) exp = d.strftime("%Y%m%d") contract1 = IBcontract() contract1.secType = "STK" contract1.symbol = sym contract1.exchange = "ISLAND" contract2 = IBcontract() contract2.secType = "OPT" contract2.symbol = sym contract2.exchange = "SMART" contract2.lastTradeDateOrContractMonth = exp contract2.right = right contract2.multiplier = 100 self.reqMktData(1032, contract1, "", False, False, []) contract1.exchange = "SMART" self.reqMktData(1033, contract1, "", False, False, []) d = self.reqContractDetails(1202, contract2) time.sleep(1) #print(d) print("=" * 40) print() print("{} Price Details:".format(sym)) lastPrice = None try: for k in list(self._my_price_details[1032].queue): t = dict(k) if t['tickType'] == 4: lastPrice = t['price'] if t['tickType'] == 9 and lastPrice == None: lastPrice = t['price'] print(t) except: try: for k in list(self._my_price_details[1033].queue): t = dict(k) if t['tickType'] == 4: lastPrice = t['price'] if t['tickType'] == 9 and lastPrice == None: lastPrice = t['price'] print(t) except: print("No stock prices available for {} at this time.".format( sym)) return if lastPrice == None: print("No stock prices available for {} at this time.".format(sym)) return # print() # print("{0} Last Price: ${1:4.2f}".format(sym, lastPrice)) # print() rID = 1100 df = DataFrame() print("Contract Details:") try: cDetails = self._my_contract_details[1202].queue except: print("Contract details for {} are not available at this time.". format(sym)) return for k in list(cDetails): t = list(str(k).split(',')) # print(t) try: if lastPrice * 1.10 > float(t[4]) > lastPrice * 0.90: df[rID] = t contract3 = IBcontract() contract3.secType = "OPT" contract3.symbol = sym contract3.exchange = "CBOE2" contract3.lastTradeDateOrContractMonth = exp contract3.strike = float(t[4]) contract3.right = right contract3.multiplier = 100 self.reqMarketDataType(2) self.reqMktData(rID, contract3, "", False, False, []) rID = rID + 1 except: pass if rID == 1100: print( "No option prices available for {} at this time.".format(sym)) return df = df.transpose() # print(df) # print("Getting option details for {0:2d} strikes:".format(len(df))) # print() time.sleep(1) df['undPrice'] = [""] * len(df) df['optPrice'] = [""] * len(df) df['delta'] = [""] * len(df) df['strike'] = [""] * len(df) df['delta60'] = [""] * len(df) for s in df.index: #self.cancelMktData(s) try: for k in list(self._my_option_data[s].queue): t = dict(k) #print(s,t) if t['delta']: try: df.loc[s, 'conId'] = int(df.loc[s, 0]) df.loc[s, 'strike'] = float(df.loc[s, 4]) df.loc[s, 'undPrice'] = t['undPrice'] df.loc[s, 'optPrice'] = t['optPrice'] df.loc[s, 'delta'] = abs(t['delta']) df.loc[s, 'delta60'] = abs(abs(t['delta']) - 0.60) except: pass except: print("No option prices available for {} at this time.".format( sym)) return # print(df.loc[:,['conId',3,'strike','undPrice','delta','delta60']].sort_values(['strike'])) # print() d60 = df.loc[df['delta60'] == df['delta60'].min()].index.min() # print("Sell a {} with the {:7.2f} strike".format(right,df.strike[d60])) t30 = (df.delta[d60] - 0.3) p = df.loc[df.delta > t30].delta.min() d30plus = df.loc[df.delta == p].index.min() m = df.loc[df.delta < t30].delta.max() d30min = df.loc[df.delta == m].index.min() if abs(df.delta[d30plus] - t30) > abs(df.delta[d30min] - t30): d30 = d30min else: d30 = d30plus # Order variables ##### cdelta = df.delta[d60] - df.delta[d30] lim = abs(df.strike[d60] - df.strike[d30]) * 0.35 try: cOptPrice = df.optPrice[d60] - df.optPrice[d30] if abs(cOptPrice) < abs(lim * 0.95): print("Spread Combo price for {} is too low.".format(sym)) return True quantity = int(budget / 100 / cOptPrice) if quantity == 0: print("Spread Combo for {} is above the budget of ${}".format( sym, budget)) return True except: quantity = 1 takeProfitLimitPrice = lim * 0. stopLossPrice = lim * 1.50 action = "SELL" #parentOrderId = 101 # print("Buy a {} with the {:7.2f} strike ".format(right,df.strike[d30])) # print("Combo delta is {:5.3f}".format(cdelta)) # print("Combo limit price is ${:7.2f}".format(lim)) # print("Combo Expiry is {}".format(exp)) # print() print( "{} - Price: ${:7.2f} - Sell a {} {:7.2f}/{:7.2f} {} Spread - Limit price: ${:5.2f} - Combo delta: {:5.3f}" .format(sym, lastPrice, exp, df.strike[d60], df.strike[d30], right, lim, cdelta)) # # Send order for the Spread above #### contract3 = IBcontract() contract3.secType = "BAG" contract3.symbol = sym contract3.exchange = "SMART" contract3.currency = "USD" leg1 = IBcomboLeg() leg1.conId = int(df.conId[d60]) # Sell the delta 60 option leg1.ratio = 1 leg1.action = "SELL" if action == "BUY" else "BUY" leg1.exchange = "SMART" leg2 = IBcomboLeg() leg2.conId = int( df.conId[d30]) # Buy the delta 30 option as protection leg2.ratio = 1 leg2.action = "BUY" if action == "BUY" else "SELL" leg2.exchange = "SMART" contract3.comboLegs = [] contract3.comboLegs.append(leg1) contract3.comboLegs.append(leg2) order3 = Order() order3.action = action order3.orderType = "LMT" order3.totalQuantity = quantity order3.lmtPrice = lim order3.tif = 'DAY' order3.transmit = False parentOrderId = self.place_new_IB_order(contract3, order3, orderid=None) takeProfit = Order() takeProfit.action = "SELL" if action == "BUY" else "BUY" takeProfit.orderType = "LMT" takeProfit.totalQuantity = quantity takeProfit.lmtPrice = takeProfitLimitPrice takeProfit.parentId = parentOrderId takeProfit.tif = 'GTC' takeProfit.transmit = False self.place_new_IB_order(contract3, takeProfit, orderid=None) stopLoss = Order() stopLoss.action = "SELL" if action == "BUY" else "BUY" stopLoss.orderType = "STP" # Stop trigger price stopLoss.auxPrice = stopLossPrice stopLoss.totalQuantity = quantity stopLoss.parentId = parentOrderId stopLoss.tif = 'GTC' # In this case, the low side order will be the last child being sent. Therefore, it needs to set this attribute to True # to activate all its predecessors stopLoss.transmit = True self.place_new_IB_order(contract3, stopLoss, orderid=None) time.sleep(1) return True
def ComputeMetrics1(stats, filename): """ DESCRIPTION :Parameters: NAME : TYPE DESCRIPTIOIN :Return: DESCRIPTION """ data = {} for article in stats: metrics = {} temp = {} title = article['article-title'] # get metrics from data allActions = GetMetric(article, 'total-actions') number_tokens = GetMetric(article, 'number-tokens') maintainanceTag = GetMetric(article, 'tag-maintained') # split metrics between maintainer and others addsMaintainer, addsOthers = SplitMO(article, maintainers[index], 'tokens-added') deletesMaintainer, deletesOthers = SplitMO(article, maintainers[index], 'tokens-deleted') revertsMaintainer, revertsOthers = SplitMO(article, maintainers[index], 'tokens-reverted') antActionsMaintainer, antActionsOthers = SplitMO(article, maintainers[index], 'antagonistic-actions') reintroMaintainer, reintroOthers = SplitMO(article, maintainers[index], 'tokens-reintroduced') selfreintroMaintainer, selfreintroOthers = SplitMO(article, maintainers[index], 'tokens-self-reintroduced') talkpageMaintainer, talkpageOthers = SplitMO(article, maintainers[index], 'talkpage-edits') # BLABLA ownershipMaintainerAbs = GetOwnership(article, maintainers_id[index], 'tokens-absolute') ownershipMaintainerRel = GetOwnership(article, maintainers_id[index], 'tokens-relative') # get properties of article metrics['firstMaintRev'] = GetFirstMaintainedRev(maintainanceTag) metrics['maintainer-name'] = article['maintainer-name'] metrics['maintainer-id'] = article['maintainer-id'] metrics['all-actions'] = sum(allActions) metrics['edits-maintainer'] = len(addsMaintainer) metrics['edits-others'] = len(addsOthers) metrics['number-revisions'] = metrics['edits-maintainer'] + metrics['edits-others'] # temporal comparison TempCompare() # to relativize with edits is just an assumptions to have something. if talkpageOthers: #metrics['talkPageRatio'] = sum(talkpageMaintainer) / float(metrics['edits-maintainer']) / float( sum(talkpageOthers) / float(metrics['edits-others']) ) metrics['talkPageRatio'] = sum(talkpageMaintainer) / float(sum(talkpageOthers)) else: metrics['talkPageRatio'] = 0 # if metrics['all-actions'] is 0: # metrics['addsMaintainerAvg'] = 0 # metrics['addsOthersAvg'] = 0 # metrics['addsRatio'] = 0 # metrics['deletesMaintainerRel'] = 0 # metrics['deletesOthersRel'] = 0 # metrics['deletesRatio'] = 0 # metrics['revertsMaintainerRel'] = 0 # metrics['revertsOthersRel'] = 0 # metrics['revertsRatio'] = 0 # metrics['reintroMaintainerAvg'] = 0 # metrics['reintroOthersAvg'] = 0 # metrics['selfreintroMaintainerAvg'] = 0 # metrics['selfreintroOthersAvg'] = 0 # metrics['selfreintroRatio'] = 0 # metrics['antActionsMaintainerAvg'] = 0 # metrics['antActionsOthersAvg'] = 0 # metrics['negActionsRatio'] = 0 # metrics['targetedIntroRatio'] = 0 # metrics['addsMaintainerRel'] = sum(addsMaintainer)/float(metrics['all-actions']) # metrics['addsOthersRel'] = sum(addsOthers)/float(metrics['all-actions']) # metrics['addsRatio'] = metrics['addsMaintainerRel'] / float(metrics['addsOthersRel']) # metrics['deletesMaintainerRel'] = sum(deletesMaintainer)/float(metrics['all-actions']) # metrics['deletesOthersRel'] = sum(deletesOthers)/float(metrics['all-actions']) # metrics['deletesRatio'] = metrics['deletesMaintainerRel'] / float(metrics['deletesOthersRel']) # metrics['revertsMaintainerRel'] = sum(revertsMaintainer)/float(metrics['all-actions']) # metrics['revertsOthersRel'] = sum(revertsOthers)/float(metrics['all-actions']) # metrics['revertsRatio'] = metrics['revertsMaintainerRel'] / float(metrics['revertsOthersRel']) # metrics['reintroMaintainerRel'] = sum(reintroMaintainer)/float(metrics['all-actions']) # metrics['reintroOthersRel'] = sum(reintroOthers)/float(metrics['all-actions']) # metrics['selfreintroMaintainerRel'] = sum(selfreintroMaintainer)/float(metrics['all-actions']) # metrics['selfreintroOthersRel'] = sum(selfreintroOthers)/float(metrics['all-actions']) # if metrics['selfreintroOthersAvg'] == 0: # metrics['selfreintroRatio'] = 0 # else: # metrics['selfreintroRatio'] = metrics['selfreintroMaintainerAvg'] / float(metrics['selfreintroOthersAvg']) # if metrics['antActionsOthersAvg'] == 0: # metrics['antActionsRatio'] = 0 # else: # metrics['antActionsRatio'] = metrics['antActionsMaintainerAvg'] / float(metrics['antActionsOthersAvg']) # if metrics['reintroMaintainerAvg'] == 0 or metrics['selfreintroOthersAvg'] == 0 or metrics['reintroOthersAvg'] == 0: # metrics['targetedIntroRatio'] = 0 # metrics['targetedIntroRatio2Ownership'] = 0 # else: # metrics['targetedIntroRatio'] = (metrics['selfreintroMaintainerAvg'] / float(metrics['reintroMaintainerAvg'])) \ # / float((metrics['selfreintroOthersAvg'] / float(metrics['reintroOthersAvg']))) # #metrics['targetedIntroRatio2Ownership'] = (metrics['selfreintroMaintainerRel'] / float(metrics['reintroMaintainerRel'])) \ # # / float((metrics['selfreintroOthersRel'] / float(metrics['reintroOthersRel']))) metrics['addsMaintainerAvg'] = sum(addsMaintainer)/float(metrics['edits-maintainer']) metrics['addsOthersAvg'] = sum(addsOthers)/float(metrics['edits-others']) metrics['addsRatio'] = metrics['addsMaintainerAvg'] / float(metrics['addsOthersAvg']) metrics['reintroMaintainerAvg'] = sum(reintroMaintainer) / float(metrics['edits-maintainer']) metrics['reintroOthersAvg'] = sum(reintroOthers) / float(metrics['edits-others']) metrics['reintroRatio'] = metrics['reintroMaintainerAvg'] / float(metrics['reintroOthersAvg']) metrics['selfreintroMaintainerAvg'] = sum(selfreintroMaintainer) / float(metrics['edits-maintainer']) metrics['selfreintroOthersAvg'] = sum(selfreintroOthers) / float(metrics['edits-others']) metrics['selfreintroRatio'] = metrics['selfreintroMaintainerAvg'] / float(metrics['selfreintroOthersAvg']) metrics['antActionsMaintainerAvg'] = sum(antActionsMaintainer)/float(metrics['edits-maintainer']) metrics['antActionsOthersAvg'] = sum(antActionsOthers)/float(metrics['edits-others']) # metrics['deletesMaintainerAvg'] = sum(deletesMaintainer)/float(metrics['edits-maintainer']) # metrics['deletesOthersAvg'] = sum(deletesOthers)/float(metrics['edits-others']) # metrics['deletesRatio'] = sum(metrics['deletesMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(temp['deletesOthersAvg']) / float(metrics['edits-others'])) # metrics['revertsMaintainerAvg'] = sum(revertsMaintainer)/float(metrics['edits-maintainer']) # metrics['revertsOthersAvg'] = sum(revertsOthers)/float(metrics['edits-others']) # metrics['revertsRatio'] = sum(metrics['revertsMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['revertsOthersAvg']) / float(metrics['edits-others'])) # metrics['revertsMaintainerPot'] = sum(revertsMaintainer)/float(metrics['edits-maintainer']) # metrics['revertsOthersPot'] = sum(revertsOthers)/float(metrics['edits-others']) # metrics['revertsPotRatio'] = sum(metrics['revertsMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['revertsOthersAvg']) / float(metrics['edits-others'])) # metrics['reintroMaintainerAvg'] = sum(reintroMaintainer)/float(metrics['edits-maintainer']) # metrics['reintroOthersAvg'] = sum(reintroOthers)/float(metrics['edits-others']) # metrics['reintroRatio'] = sum(metrics['reintroMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['reintroOthersAvg']) / float(metrics['edits-others'])) # metrics['selfreintroMaintainerAvg'] = sum(selfreintroMaintainer)/float(metrics['edits-maintainer']) # metrics['selfreintroOthersAvg'] = sum(selfreintroOthers)/float(metrics['edits-others']) # metrics['selfreintroRatio'] = sum(metrics['selfreintroMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['selfreintroOthersAvg']) / float(metrics['edits-others'])) # share of selfreintroductions of potential own tokens # temp['selfreintroMaintainerPot'] = [(b/float(a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], selfreintroMaintainer[1:len(selfreintroMaintainer)-1])] # temp['selfreintroOthersPot'] = [(b/float(c-a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], selfreintroOthers[1:len(selfreintroOthers)-1], number_tokens[:len(number_tokens)-2) if a is not 0] # metrics['selfreintroPotRatio'] = sum(temp['selfreintroMaintainerPot']) / float(metrics['edits-maintainer']) / float(sum(temp['selfreintroOthersPot']) / float(metrics['edits-others'])) # temp['antActionsMaintainerPot'] = [(b/float(a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], antActionsMaintainer[1:len(antActionsMaintainer)-1]) if a is not 0] # temp['antActionsOthersPot'] = [(b/float(c-a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], antActionsOthers[1:len(antActionsOthers)-1], number_tokens[:len(number_tokens)-2]) if a is not 0] # metrics['antActionsRatio'] = sum(temp['antActionsMaintainerPot']) / float(metrics['edits-maintainer']) / float(sum(temp['antActionsOthersPot']) / float(metrics['edits-others'])) data[title] = metrics data = DataFrame(data) data = data.transpose() save2CSV(data, filename) return data
path = "C:\\Users\\keriambermudez\\Dropbox\\David_Fenyos_Lab\\Image_Analysis\\Testing_Jacop\\Basal_bleomycin_masked_cvs\\" #%% basal = pd.read_csv( "C:\\Users\\keriambermudez\\Dropbox\\David_Fenyos_Lab\\Image_Analysis\\Testing_Jacop\\Basal\\Masked_log_files\\Basal_masked_jacop.csv" ) bleomycin = pd.read_csv( "C:\\Users\\keriambermudez\\Dropbox\\David_Fenyos_Lab\\Image_Analysis\\Testing_Jacop\\Bleomycyn\\Masked_log_files\\Bleomycin_masked_jacop.csv" ) #%% icq = DataFrame([basal.ix[:, '43'], bleomycin.ix[:, '43']], index=['basal', 'bleomycin']) icq = icq.transpose() icq.to_csv(path + "ICQ.csv") pearsons = DataFrame([basal.ix[:, '3'], bleomycin.ix[:, '3']], index=['basal', 'bleomycin']) pearsons = pearsons.transpose() pearsons.to_csv(path + "pearsons.csv") overlap_coef = DataFrame([basal.ix[:, '5'], bleomycin.ix[:, '5']], index=['basal', 'bleomycin']) overlap_coef = overlap_coef.transpose() overlap_coef.to_csv(path + "overlap_coef.csv") M1 = DataFrame([basal.ix[:, '16'], bleomycin.ix[:, '16']], index=['basal', 'bleomycin']) M1 = M1.transpose()
subplot.scatter(train_set.x, train_set.y, marker='o', color='blue') # 真の曲線を表示 linex = np.linspace(0, 1, 101) liney = np.sin(2 * np.pi * linex) subplot.plot(linex, liney, color='green', linestyle='--') # 多項式近似の曲線を表示 linex = np.linspace(0, 1, 101) liney = f(linex) label = "E(RMS)=%.2f" % rms_error(train_set, f) subplot.plot(linex, liney, color='red', label=label) subplot.legend(loc=1) # 係数の値を表示 print "Table of the coefficients" print df_ws.transpose() fig.show() # トレーニングセットとテストセットでの誤差の変化を表示 df = DataFrame(columns=['Training set', 'Test set']) for m in range(0, 10): # 多項式の次数 f, ws = resolve(train_set, m) train_error = rms_error(train_set, f) test_error = rms_error(test_set, f) df = df.append(Series([train_error, test_error], index=['Training set', 'Test set']), ignore_index=True) df.plot(title='RMS Error', style=['-', '--'], grid=True, ylim=(0, 0.9)) plt.show()
## ## a=np.zeros((len(c),len(d))) ## ## keys=[] ## keys2=[] ## ## for key,values in pin.iteritems(): ## keys.append(key) ## for key2, value in values.iteritems(): ## if key2 not in keys2: ## keys2.append(key2) ## ## for key, values in pin.iteritems(): ## index=keys.index(key) ## for key2, value in values.iteritems(): ## index2=keys2.index(key2) ## a[index][index2]=values.get(key2,0) ## ## np.savetxt("pinmat.csv", a, delimiter=",",fmt='%2i') # Write Jaccard index Matrix print "Dataframe" df2 = DataFrame(pinjac) print "transpose" df2t = df2.transpose() print "to csv" df2t.to_csv('pinsJac.csv') cur.close() conn.close()
def test_frame_from_json_to_json(self): def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_dtype=True, raise_ok=None): df = df.sort() dfjson = df.to_json(orient=orient) try: unser = read_json(dfjson, orient=orient, dtype=dtype, numpy=numpy, convert_axes=convert_axes) except Exception as detail: if raise_ok is not None: if isinstance(detail, raise_ok): return raise unser = unser.sort() if dtype is False: check_dtype=False if not convert_axes and df.index.dtype.type == np.datetime64: unser.index = DatetimeIndex( unser.index.values.astype('i8') * 1e6) if orient == "records": # index is not captured in this orientation assert_almost_equal(df.values, unser.values) self.assertTrue(df.columns.equals(unser.columns)) elif orient == "values": # index and cols are not captured in this orientation assert_almost_equal(df.values, unser.values) elif orient == "split": # index and col labels might not be strings unser.index = [str(i) for i in unser.index] unser.columns = [str(i) for i in unser.columns] unser = unser.sort() assert_almost_equal(df.values, unser.values) else: if convert_axes: assert_frame_equal(df, unser, check_dtype=check_dtype) else: assert_frame_equal(df, unser, check_less_precise=False, check_dtype=check_dtype) def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None): # numpy=False if convert_axes: _check_orient(df, "columns", dtype=dtype) _check_orient(df, "records", dtype=dtype) _check_orient(df, "split", dtype=dtype) _check_orient(df, "index", dtype=dtype) _check_orient(df, "values", dtype=dtype) _check_orient(df, "columns", dtype=dtype, convert_axes=False) _check_orient(df, "records", dtype=dtype, convert_axes=False) _check_orient(df, "split", dtype=dtype, convert_axes=False) _check_orient(df, "index", dtype=dtype, convert_axes=False) _check_orient(df, "values", dtype=dtype ,convert_axes=False) # numpy=True and raise_ok might be not None, so ignore the error if convert_axes: _check_orient(df, "columns", dtype=dtype, numpy=True, raise_ok=raise_ok) _check_orient(df, "records", dtype=dtype, numpy=True, raise_ok=raise_ok) _check_orient(df, "split", dtype=dtype, numpy=True, raise_ok=raise_ok) _check_orient(df, "index", dtype=dtype, numpy=True, raise_ok=raise_ok) _check_orient(df, "values", dtype=dtype, numpy=True, raise_ok=raise_ok) _check_orient(df, "columns", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok) _check_orient(df, "records", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok) _check_orient(df, "split", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok) _check_orient(df, "index", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok) _check_orient(df, "values", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok) # basic _check_all_orients(self.frame) self.assertEqual(self.frame.to_json(), self.frame.to_json(orient="columns")) _check_all_orients(self.intframe, dtype=self.intframe.values.dtype) _check_all_orients(self.intframe, dtype=False) # big one # index and columns are strings as all unserialised JSON object keys # are assumed to be strings biggie = DataFrame(np.zeros((200, 4)), columns=[str(i) for i in range(4)], index=[str(i) for i in range(200)]) _check_all_orients(biggie,dtype=False,convert_axes=False) # dtypes _check_all_orients(DataFrame(biggie, dtype=np.float64), dtype=np.float64, convert_axes=False) _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int, convert_axes=False) _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3', convert_axes=False, raise_ok=ValueError) # empty _check_all_orients(self.empty_frame) # time series data _check_all_orients(self.tsframe) # mixed data index = pd.Index(['a', 'b', 'c', 'd', 'e']) data = { 'A': [0., 1., 2., 3., 4.], 'B': [0., 1., 0., 1., 0.], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': [True, False, True, False, True] } df = DataFrame(data=data, index=index) _check_orient(df, "split", check_dtype=False) _check_orient(df, "records", check_dtype=False) _check_orient(df, "values", check_dtype=False) _check_orient(df, "columns", check_dtype=False) # index oriented is problematic as it is read back in in a transposed # state, so the columns are interpreted as having mixed data and # given object dtypes. # force everything to have object dtype beforehand _check_orient(df.transpose().transpose(), "index", dtype=False)
class Sensor(): def __init__(self, sensor_id, distance, dbReader): self.sensor_id = sensor_id self.distance = distance #print "Sensor ID: ", sensor_id self.dbReader = dbReader self.spectrum_info = DataFrame() self.normal_info = DataFrame() self.calculated_info = DataFrame() self.count = 1 self.state = "PASSIVE" self.last_active = 0 self.clock_diff = 0 self.unavailable_frequencies = [] self.occupied_frequencies = [] self.table_name = "channelinfo_" + str(self.sensor_id) self.channelInfo = DataFrame() self.log_count = 0 self.last_active_time = 0 self.potential = dict() self.cbsd_pu = dict() self.longitude = 0 self.latitude = 0 self.distUpdate = False self.last_loc_update = 0 self.calc_psd = dict() self.spectrum_noise = DataFrame() self.avg_noise = dict() #self.fetch_pu_info() def fetch_channel_info(self): #print "Table Name: ",self.table_name conditions = {'startfreq': ' > 800e6 AND "startfreq" < 1000e6'} self.channelInfo = self.dbReader.fetch_data( ['startfreq', 'occ', 'noise_floor'], self.table_name, conditions, 'ORDER BY startfreq') if self.channelInfo.size > 0: transposed_info = self.channelInfo.transpose() cols = None info = None noise_floor = None for row in transposed_info.itertuples(): if (row[0] == 'startfreq'): cols = list(row[1:-1]) if (row[0] == 'occ'): info = list(row[1:-1]) if (row[0]) == 'noise_floor': noise_floor = list(row[1:-1]) #current_time = time.mktime(datetime.datetime.utcnow().timetuple()) current_time = datetime.datetime.utcnow() ind = [current_time] temp = DataFrame(info, columns=ind, index=cols) temp = temp.transpose() temp_noise = DataFrame(noise_floor, columns=ind, index=cols) temp_noise = temp_noise.transpose() #print "Current Time: ",current_time #print temp_noise if self.spectrum_info.size == 0: self.spectrum_info = temp self.spectrum_noise = temp_noise else: try: self.spectrum_info = self.spectrum_info.append( temp, ignore_index=False) self.spectrum_noise = self.spectrum_noise.append( temp_noise, ignore_index=False) except Exception as e: print "Error Appending:", e file_name = self.table_name + '.csv' if self.log_count == 0: temp.to_csv(file_name, mode='w', sep='\t') self.log_count = 1 else: temp.to_csv(file_name, mode='a', header=False, sep='\t') spectrum_info_shape = self.spectrum_info.shape row_num = spectrum_info_shape[0] #print self.spectrum_info if (row_num > 3): #print self.spectrum_info ind_to_drop = self.spectrum_info.axes[0][0] self.spectrum_info = self.spectrum_info.drop(ind_to_drop) ind_to_drop = self.spectrum_noise.axes[0][0] self.spectrum_noise = self.spectrum_noise.drop(ind_to_drop) self.fetch_pu_info() def update_thresholds(self, nearest, near, furthest, startfreq): table_name = self.table_name + '_pu' input_data = {'nearest': nearest, 'near': near, 'furthest': furthest} self.dbReader.update_data(input_data, table_name, {'startfreq': startfreq}) def fetch_pu_info(self): query = 'SELECT' + " pu_frequencies" + ',' + 'registered_cbsds."fccId"' + ', last_active, distance' + ' FROM sensorcbsdconnection INNER JOIN registered_cbsds ON sensorcbsdconnection."fccId" = registered_cbsds."fccId" WHERE "nodeid" = ' + str( self.sensor_id) self.potential_pu = self.dbReader.fetchQuery(query) self.potential_pu = self.potential_pu[ self.potential_pu.last_active.notnull()] #self.potential_pu = self.potential_pu[self.potential_pu.pu_frequencies.notnull()] index_list = self.potential_pu.index.tolist() for i in index_list: fccId = self.potential_pu.loc[i]['fccId'] if fccId not in self.cbsd_pu: self.cbsd_pu[fccId] = CBSD( fccId, self.potential_pu.loc[i]['distance'], self.dbReader, self.sensor_id) self.cbsd_pu[fccId].setLastActive( np.asscalar(self.potential_pu.loc[i]['last_active'])) self.cbsd_pu[fccId].set_distance( self.potential_pu.loc[i]['distance']) #print np.asscalar(self.potential_pu.loc[i]['last_active']) freq_list = np.asarray(self.potential_pu.loc[i]['pu_frequencies']) freq_list = freq_list.tolist() self.cbsd_pu[fccId].addPotentialPuFrequencies(freq_list) def updatePerceivedPU(self, key, value): check = self.spectrum_info.to_dict() check2 = self.calculated_info.to_dict() if key in self.potential: #print key, " : ", check[key] #print "Chosen: ", check2[key] pass if value > 200 and key in self.potential: print key, " lost a potential -------------- dist ", value, " at ", self.sensor_id, " psd = ", self.calc_psd[ key] #print check[key] self.potential.pop(key, None) self.distUpdate = True elif key not in self.potential and value < 200: self.potential[key] = value #print check[key] print key, " gained a potential ++++++++++++++ dist ", value, " at ", self.sensor_id, " psd = ", self.calc_psd[ key] self.distUpdate = True elif key in self.potential and self.potential[key] != value: self.potential[key] = value #print check[key] print key, " Updated Distance: ", value, " at ", self.sensor_id, " psd = ", self.calc_psd[ key] self.distUpdate = True def averageNoise(self): noise_dict = self.spectrum_noise.to_dict() if len(noise_dict) > 0: for freq, value in noise_dict.iteritems(): if len(noise_dict[freq]) > 0: self.avg_noise[freq] = 0 for time_stamp, noise_dbm in noise_dict[freq].iteritems(): self.avg_noise[freq] = self.avg_noise[ freq] + self.dbm_to_mw(noise_dbm) self.avg_noise[freq] = self.avg_noise[freq] / len( noise_dict[freq]) self.avg_noise[freq] = self.mw_to_dbm(self.avg_noise[freq]) return self.avg_noise def mw_to_dbm(self, mW): return 10. * math.log10(mW) def dbm_to_mw(self, dBm): return 10**((dBm) / 10.) def test(self): pass
def main(): train_set = create_dataset(N) test_set = create_dataset(N) df_ws = DataFrame() # 多項式近似の曲線を求めて表示 fig = plt.figure() for c, m in enumerate(M): f, ws = lsm_resolve(train_set, m) df_ws = df_ws.append(Series(ws, name="M = %d" % m)) subplot = fig.add_subplot(2, 2, c + 1) subplot.set_xlim(-0.05, 1.05) subplot.set_ylim(-1.5, 1.5) subplot.set_title("M = %d" % m) # トレーニングセットを表示 subplot.scatter(train_set.x, train_set.y, marker='o', color='blue', label=None) # 真の曲線を表示 linex = np.linspace(0, 1, 101) liney = np.sin(2 * np.pi * linex) subplot.plot(linex, liney, color='green', linestyle='--') # 多項式近似の曲線を表示 linex = np.linspace(0, 1, 101) liney = f(linex) label = "E(RMS)=%.2f" % rms_error(train_set, f) subplot.plot(linex, liney, color='red', label=label) subplot.legend(loc=1) # 係数の値を表示 # p69 図2.3 # N = 100の場合:p80 図2.11 print("Table of the coefficients") print(df_ws.transpose()) # N = 100の場合:p80 図2.12(2.2とあまり変わらない) # fig.savefig("out/021-p68_fig2.2.png") # トレーニングセットとテストセットでの誤差の変化を表示 df = DataFrame(columns=['Training set', 'Test set']) # 多項式の次数の分だけ繰り返す for m in range(0, 10): f, ws = lsm_resolve(train_set, m) train_error = rms_error(train_set, f) test_error = rms_error(test_set, f) df = df.append(Series([train_error, test_error], index=['Training set', 'Test set']), ignore_index=True) # p77 図2.8 # 次数Mが3以上になったあたりで、テストセットに対する誤差の減りが鈍くなる(0.3くらいに収束する) # トレーニングセットに対してはN = 10の場合M = 9で完全に正確(トレーニングセットの学習結果をトレーニングセットとの誤差で比較してるから当然だが) # →過学習(オーバーフィッティング):トレーニングセットに特化した汎用性のない結果を得てしまう df.plot(title='RMS Error', style=['-', '--'], grid=True, ylim=(0, 0.9)) # plt.savefig("out/021-p77_fig2.8.png") plt.show()
def memory_perf(system_list, unique_id, group_number, detail_options, rampup_value=0, current_dir=""): have_memory_data = False modes = ['1K', '4K', '1M', '16M', '128M', '256M', '1G', '2G'] sets = search_item(system_list, unique_id, "cpu", "(.*)", [], modes) for mode in sorted(modes): real_mode = "Memory benchmark %s" % mode results = {} threaded_perf = dict() forked_perf = dict() for system in sets: memory = [] series = [] found_data = "" threaded_perf[system] = 0 forked_perf[system] = 0 for perf in sets[system]: if mode in perf[2]: # We shall split individual cpu benchmarking from # the global one if ("logical_" in perf[1] and ("bandwidth_%s" % mode) in perf[2]): if not perf[1] in memory: memory.append(perf[1]) series.append(float(perf[3])) elif "threaded_bandwidth_%s" % mode in perf[2]: threaded_perf[system] = float(perf[3]) found_data = float(perf[3]) elif "forked_bandwidth_%s" % mode in perf[2]: forked_perf[system] = float(perf[3]) found_data = float(perf[3]) if found_data: # If no series are populated, it means that a single "All CPU" # run was done # If so, let's create a single run value if not series: series.append(found_data) memory.append("logical") results[system] = Series(series, index=memory) # No need to continue if no Memory data in this benchmark if not results: continue consistent = [] curious = [] unstable = [] details = [] matched_category = '' df = DataFrame(results) for memory in df.transpose().columns: if have_memory_data is False: print() print("Group %d : Checking Memory perf" % group_number) have_memory_data = True print_perf(1, 7, df.transpose()[memory], df, real_mode, memory, consistent, curious, unstable, "", rampup_value, current_dir) matched_category = [] prepare_detail(detail_options, group_number, mode, memory, details, matched_category) print_detail(detail_options, details, df, matched_category) print_summary(mode, consistent, "consistent", "MB/s", df) print_summary(mode, curious, "curious", "MB/s", df) print_summary(mode, unstable, "unstable", "MB/s", df) for bench_type in ["threaded", "forked"]: efficiency = {} have_forked_or_threaded = False if "threaded" in bench_type: mode_text = "Thread effi." else: mode_text = "Forked Effi." for system in sets: host_efficiency_full_load = [] host_perf = df[system].sum() if (host_perf > 0 and threaded_perf[system] > 0 and forked_perf[system] > 0): have_forked_or_threaded = True if "threaded" in bench_type: host_efficiency_full_load.append( threaded_perf[system] / host_perf * 100) else: host_efficiency_full_load.append(forked_perf[system] / host_perf * 100) efficiency[system] = Series(host_efficiency_full_load, index=[mode_text]) details = [] memory_eff = DataFrame(efficiency) if have_forked_or_threaded is True: consistent = [] curious = [] unstable = [] for memory in memory_eff.transpose().columns: print_perf(2, 10, memory_eff.transpose()[memory], memory_eff, real_mode, memory, consistent, curious, unstable) matched_category = [] prepare_detail(detail_options, group_number, mode, memory, details, matched_category) # Let's pad if its a thread or forked effi in addition # of the block size if matched_category: matched_category[0] += " " + mode_text print_detail(detail_options, details, memory_eff, matched_category) print_summary(mode + " " + mode_text, consistent, "consistent", "%", memory_eff) print_summary(mode + " " + mode_text, curious, "curious", "%", memory_eff) print_summary(mode + " " + mode_text, unstable, "unstable", "%", memory_eff) else: utils.do_print(real_mode, utils.Levels.WARNING, "%-12s : Benchmark not run on this group", mode_text)
def cpu_perf(system_list, unique_id, group_number, detail_options, rampup_value=0, current_dir=""): have_cpu_data = False host_cpu_list = search_item(system_list, unique_id, "cpu", "(.*)", [], ['product']) host_cpu_number = search_item(system_list, unique_id, "cpu", "(.*logical.*)", [], ['number']) core_counts = 1 for host in host_cpu_number: for item in host_cpu_number[host]: core_counts = item[3] break cpu_type = '' for host in host_cpu_list: for item in host_cpu_list[host]: cpu_type = item[3] break modes = ['bogomips', 'loops_per_sec'] sets = search_item(system_list, unique_id, "cpu", "(.*)", [], modes) global_perf = dict() for mode in sorted(modes): results = {} for system in sets: cpu = [] series = [] found_data = False for perf in sets[system]: if perf[2] == mode: # We shall split individual cpu benchmarking from # the global one if "_" in perf[1]: if not perf[1] in cpu: cpu.append(perf[1]) series.append(float(perf[3])) found_data = True elif "loops_per_sec" in mode: global_perf[system] = float(perf[3]) found_data = True if found_data is True: # If no series are populated, it means that a single # "All CPU" run was done # If so, let's create a single run value if not series: series.append(global_perf[system]) cpu.append("logical") results[system] = Series(series, index=cpu) # No need to continue if no CPU data in this benchmark if not results: continue df = DataFrame(results) consistent = [] curious = [] unstable = [] details = [] matched_category = [] for cpu in df.transpose().columns: if have_cpu_data is False: print() print("Group %d : Checking CPU perf" % group_number) have_cpu_data = True print_perf(2, 7, df.transpose()[cpu], df, mode, cpu, consistent, curious, unstable, "", rampup_value, current_dir) prepare_detail(detail_options, group_number, mode, cpu, details, matched_category) print_detail(detail_options, details, df, matched_category) print_summary(mode, consistent, "consistent", "", df, cpu_type) print_summary(mode, curious, "curious", "", df) print_summary(mode, unstable, "unstable", "", df) if mode == "loops_per_sec": efficiency = {} mode_text = 'CPU Effi.' consistent = [] curious = [] unstable = [] details = [] matched_category = [] for system in sets: host_efficiency_full_load = [] host_perf = (df[system].sum() * (int(core_counts) / df[system].count())) host_efficiency_full_load.append(global_perf[system] / host_perf * 100) efficiency[system] = Series(host_efficiency_full_load, index=[mode_text]) cpu_eff = DataFrame(efficiency) print_perf(1, 2, cpu_eff.transpose()[mode_text], cpu_eff, mode, mode_text, consistent, curious, unstable) prepare_detail(detail_options, group_number, mode, mode_text, details, matched_category) print_detail(detail_options, details, cpu_eff, matched_category) print_summary("CPU Efficiency", consistent, "consistent", '%', cpu_eff) print_summary("CPU Efficiency", curious, "curious", '%', cpu_eff) print_summary("CPU Efficiency", unstable, "unstable", '%', cpu_eff)
def logical_disks_perf(system_list, unique_id, group_number, detail_options, perf_unit, rampup_value=0, current_dir=""): have_disk_data = False sets = search_item(system_list, unique_id, "disk", r"[a-z]d(\S+)", [], ['simultaneous', 'standalone']) modes = [] # Searching for modes ran in this benchmark for system in sets: for perf in sets[system]: if perf[2] not in modes and perf_unit in perf[2]: modes.append(perf[2]) if modes: return for mode in sorted(modes): results = {} for system in sets: disks = [] series = [] for perf in sets[system]: if perf[2] == mode: if not perf[1] in disks: disks.append(perf[1]) series.append(int(perf[3])) results[system] = Series(series, index=disks) df = DataFrame(results) details = [] matched_category = [] for disk in df.transpose().columns: if have_disk_data is False: print() print("Group %d : Checking logical disks perf" % group_number) have_disk_data = True consistent = [] curious = [] unstable = [] # How much the variance could be far from the average (in %) tolerance_max = 10 tolerance_min = 2 # In random mode, the variance could be higher as # we cannot insure the distribution pattern was similar if "rand" in mode: tolerance_min = 5 tolerance_max = 15 print_perf(tolerance_min, tolerance_max, df.transpose()[disk], df, mode, disk, consistent, curious, unstable, "-%s" % perf_unit, rampup_value, current_dir) prepare_detail(detail_options, group_number, mode, disk, details, matched_category) print_summary("%-30s %s" % (mode, disk), consistent, "consistent", perf_unit, df) print_summary("%-30s %s" % (mode, disk), curious, "curious", perf_unit, df) print_summary("%-30s %s" % (mode, disk), unstable, "unstable", perf_unit, df) print_detail(detail_options, details, df, matched_category)
def dataframe_transpose(df: pd.DataFrame): return df.transpose().reset_index()
#print col_ind ," : col",dev_name ,u' 已经存在,请检查源表格数据!' sys.exit() else: continue minn=min(map(len,ser))-1#serr=ser[:] ddd=[ser[1][:minn][::-1],ser[2][:minn][::-1]] #file2=os.path.join(os.getcwd(),'Data_out.xlsx') ##print '\nData extracted sucessfully! Now writing to new file……\n' oltdata=DataFrame(devs_Paravalue.values(),index=name) oltdata2=oltdata.transpose() oltpara=DataFrame(ddd,index=['compareTypeInfo','compareItem']) oltpara2=oltpara.transpose() oltall=oltpara2.join(oltdata2,how='outer') oltall2=oltall.drop_duplicates() excelname=os.path.join(outdir,"Data_out.xlsx") setColWidth(excelname,'比较信息',oltall2.shape[1],200) ##print u'閻㈢喐鍨氶弬鍥︽閻ㄥ嫬鍨€瑰€燁啎缂冾喕璐?0 娑擃亞鈹栫€涙顑佺€硅棄瀹? if not os.path.exists(os.path.join(os.getcwd(),excelname)): setColWidth(excelname,'比较信息',oltall2.shape[1],200) else: ss=ExcelWriter(excelname) pattern=r'^(.*(?:[^\W\d_]*))\n(.+)$' reg=compile(pattern,re.M)
#print Dgood.describe() print ' ' #print Dbad.describe() print ' ' Dx = DataFrame() for k in keys_list: kgood = str(k+' good') kbad = str(k+' bad') gD = Dgood[k] bD = Dbad[k] print k, '\nGood: ', gD.describe(), '\n Bad:', bD.describe(), '\n\n' gD = np.array(Dgood[k]).reshape(-1,) bD = np.array(Dbad[k]).reshape(-1) Dx = DataFrame([gD,bD]) DD = Dx.transpose() Dx.columns = [kgood, kbad] Dx.plot.hist() plt.show() #print Dgood print ' ' #print Dbad Dgood.hist(grid=True, bins = 1000) plt.title(" Good TODs" )
def logical_disks_perf(systems, unique_id, group_number, detail_options, perf_unit, rampup_value=0, current_dir=""): have_disk_data = False sets = search_item(systems, unique_id, "disk", r"[a-z]d(\S+)", [], ['simultaneous', 'standalone']) modes = [] # Searching for modes ran in this benchmark for system in sets: for perf in sets[system]: if perf[2] not in modes and perf_unit in perf[2]: modes.append(perf[2]) if len(modes) == 0: return for mode in sorted(modes): results = {} for system in sets: disks = [] series = [] for perf in sets[system]: if perf[2] == mode: if not perf[1] in disks: disks.append(perf[1]) series.append(int(perf[3])) results[system] = Series(series, index=disks) df = DataFrame(results) details = [] matched_category = [] for disk in df.transpose().columns: if have_disk_data is False: print() print("Group %d : Checking logical disks perf" % group_number) have_disk_data = True consistent = [] curious = [] unstable = [] # How much the variance could be far from the average (in %) tolerance_max = 10 tolerance_min = 2 # In random mode, the variance could be higher as # we cannot insure the distribution pattern was similar if "rand" in mode: tolerance_min = 5 tolerance_max = 15 print_perf(tolerance_min, tolerance_max, df.transpose()[disk], df, mode, disk, consistent, curious, unstable, "-%s" % perf_unit, rampup_value, current_dir) prepare_detail(detail_options, group_number, mode, disk, details, matched_category) print_summary("%-30s %s" % (mode, disk), consistent, "consistent", perf_unit, df) print_summary("%-30s %s" % (mode, disk), curious, "curious", perf_unit, df) print_summary("%-30s %s" % (mode, disk), unstable, "unstable", perf_unit, df) print_detail(detail_options, details, df, matched_category)
def test_frame_from_json_to_json(self): def _check_orient(df, orient, dtype=None, numpy=True): df = df.sort() dfjson = df.to_json(orient=orient) unser = DataFrame.from_json(dfjson, orient=orient, dtype=dtype, numpy=numpy) unser = unser.sort() if df.index.dtype.type == np.datetime64: unser.index = DatetimeIndex(unser.index.values.astype('i8')) if orient == "records": # index is not captured in this orientation assert_almost_equal(df.values, unser.values) self.assert_(df.columns.equals(unser.columns)) elif orient == "values": # index and cols are not captured in this orientation assert_almost_equal(df.values, unser.values) elif orient == "split": # index and col labels might not be strings unser.index = [str(i) for i in unser.index] unser.columns = [str(i) for i in unser.columns] unser = unser.sort() assert_almost_equal(df.values, unser.values) else: assert_frame_equal(df, unser) def _check_all_orients(df, dtype=None): _check_orient(df, "columns", dtype=dtype) _check_orient(df, "records", dtype=dtype) _check_orient(df, "split", dtype=dtype) _check_orient(df, "index", dtype=dtype) _check_orient(df, "values", dtype=dtype) _check_orient(df, "columns", dtype=dtype, numpy=False) _check_orient(df, "records", dtype=dtype, numpy=False) _check_orient(df, "split", dtype=dtype, numpy=False) _check_orient(df, "index", dtype=dtype, numpy=False) _check_orient(df, "values", dtype=dtype, numpy=False) # basic _check_all_orients(self.frame) self.assertEqual(self.frame.to_json(), self.frame.to_json(orient="columns")) _check_all_orients(self.intframe, dtype=self.intframe.values.dtype) # big one # index and columns are strings as all unserialised JSON object keys # are assumed to be strings biggie = DataFrame(np.zeros((200, 4)), columns=[str(i) for i in range(4)], index=[str(i) for i in range(200)]) _check_all_orients(biggie) # dtypes _check_all_orients(DataFrame(biggie, dtype=np.float64), dtype=np.float64) _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int) _check_all_orients(DataFrame(biggie, dtype='<U3'), dtype='<U3') # empty _check_all_orients(self.empty_frame) # time series data _check_all_orients(self.tsframe) # mixed data index = pd.Index(['a', 'b', 'c', 'd', 'e']) data = { 'A': [0., 1., 2., 3., 4.], 'B': [0., 1., 0., 1., 0.], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': [True, False, True, False, True] } df = DataFrame(data=data, index=index) _check_orient(df, "split") _check_orient(df, "records") _check_orient(df, "values") _check_orient(df, "columns") # index oriented is problematic as it is read back in in a transposed # state, so the columns are interpreted as having mixed data and # given object dtypes. # force everything to have object dtype beforehand _check_orient(df.transpose().transpose(), "index")
def memory_perf(systems, unique_id, group_number, detail_options, rampup_value=0, current_dir=""): have_memory_data = False modes = ['1K', '4K', '1M', '16M', '128M', '256M', '1G', '2G'] sets = search_item(systems, unique_id, "cpu", "(.*)", [], modes) for mode in sorted(modes): real_mode = "Memory benchmark %s" % mode results = {} threaded_perf = dict() forked_perf = dict() for system in sets: memory = [] series = [] found_data = "" threaded_perf[system] = 0 forked_perf[system] = 0 for perf in sets[system]: if mode in perf[2]: # We shall split individual cpu benchmarking from # the global one if ("logical_" in perf[1] and ("bandwidth_%s" % mode) in perf[2]): if not perf[1] in memory: memory.append(perf[1]) series.append(float(perf[3])) elif "threaded_bandwidth_%s" % mode in perf[2]: threaded_perf[system] = float(perf[3]) found_data = float(perf[3]) elif "forked_bandwidth_%s" % mode in perf[2]: forked_perf[system] = float(perf[3]) found_data = float(perf[3]) if found_data: # If no series are populated, it means that a single "All CPU" # run was done # If so, let's create a single run value if not series: series.append(found_data) memory.append("logical") results[system] = Series(series, index=memory) # No need to continue if no Memory data in this benchmark if not results: continue consistent = [] curious = [] unstable = [] details = [] matched_category = '' df = DataFrame(results) for memory in df.transpose().columns: if have_memory_data is False: print() print("Group %d : Checking Memory perf" % group_number) have_memory_data = True print_perf(1, 7, df.transpose()[memory], df, real_mode, memory, consistent, curious, unstable, "", rampup_value, current_dir) matched_category = [] prepare_detail(detail_options, group_number, mode, memory, details, matched_category) print_detail(detail_options, details, df, matched_category) print_summary(mode, consistent, "consistent", "MB/s", df) print_summary(mode, curious, "curious", "MB/s", df) print_summary(mode, unstable, "unstable", "MB/s", df) for bench_type in ["threaded", "forked"]: efficiency = {} have_forked_or_threaded = False if "threaded" in bench_type: mode_text = "Thread effi." else: mode_text = "Forked Effi." for system in sets: host_efficiency_full_load = [] host_perf = df[system].sum() if (host_perf > 0 and threaded_perf[system] > 0 and forked_perf[system] > 0): have_forked_or_threaded = True if "threaded" in bench_type: host_efficiency_full_load.append( threaded_perf[system] / host_perf * 100) else: host_efficiency_full_load.append( forked_perf[system] / host_perf * 100) efficiency[system] = Series(host_efficiency_full_load, index=[mode_text]) details = [] memory_eff = DataFrame(efficiency) if have_forked_or_threaded is True: consistent = [] curious = [] unstable = [] for memory in memory_eff.transpose().columns: print_perf(2, 10, memory_eff.transpose()[memory], memory_eff, real_mode, memory, consistent, curious, unstable) matched_category = [] prepare_detail(detail_options, group_number, mode, memory, details, matched_category) # Let's pad if its a thread or forked effi in addition # of the block size if matched_category: matched_category[0] += " " + mode_text print_detail(detail_options, details, memory_eff, matched_category) print_summary(mode + " " + mode_text, consistent, "consistent", "%", memory_eff) print_summary(mode + " " + mode_text, curious, "curious", "%", memory_eff) print_summary(mode + " " + mode_text, unstable, "unstable", "%", memory_eff) else: utils.do_print(real_mode, utils.Levels.WARNING, "%-12s : Benchmark not run on this group", mode_text)
def speichern(file, name): df = DataFrame(file) df.transpose() writer = pd.ExcelWriter("rsc/" + name + ".xlsx", engine="xlsxwriter") df.to_excel(writer, sheet_name="minVerbrauch", header=False, index=False) writer.save()
def zonal_stats(vector_path, raster_path, class_attr=None, ret_vals=True, ret_stats=True, nodata_value=None, global_src_extent=False): rds = gdal.Open(raster_path, GA_ReadOnly) assert (rds) rb = rds.GetRasterBand(1) rgt = rds.GetGeoTransform() if nodata_value: nodata_value = float(nodata_value) rb.SetNoDataValue(nodata_value) vds = ogr.Open( vector_path, GA_ReadOnly) # TODO maybe open update if we want to write stats assert (vds) vlyr = vds.GetLayer(0) # create an in-memory numpy array of the source raster data # covering the whole extent of the vector layer if global_src_extent: # use global source extent # useful only when disk IO or raster scanning inefficiencies are your limiting factor # advantage: reads raster data in one pass # disadvantage: large vector extents may have big memory requirements src_offset = bbox_to_pixel_offsets(rgt, vlyr.GetExtent()) # src_array = rb.ReadAsArray(*src_offset) src_array = rds.ReadAsArray(*src_offset) # calculate new geotransform of the layer subset new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5]) mem_drv = ogr.GetDriverByName('Memory') driver = gdal.GetDriverByName('MEM') # Loop through vectors stats = [] cols = ['b' + str(i + 1) for i in range(rds.RasterCount)] vals = DataFrame(columns=cols) feat = vlyr.GetNextFeature() while feat is not None: if not global_src_extent: # use local source extent # fastest option when you have fast disks and well indexed raster (ie tiled Geotiff) # advantage: each feature uses the smallest raster chunk # disadvantage: lots of reads on the source raster src_offset = bbox_to_pixel_offsets(rgt, feat.geometry().GetEnvelope()) # src_array = rb.ReadAsArray(*src_offset) src_array = rds.ReadAsArray(*src_offset) # calculate new geotransform of the feature subset new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5]) # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource('out') mem_layer = mem_ds.CreateLayer('poly', None, ogr.wkbPolygon) mem_layer.CreateFeature(feat.Clone()) # Rasterize it rvds = driver.Create('', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) gdal.RasterizeLayer(rvds, [1], mem_layer, burn_values=[1]) rv_array = rvds.ReadAsArray() # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 to get the correct mask effect # we also mask out nodata values explictly masked = np.ma.MaskedArray(src_array, mask=np.tile( np.logical_or(src_array == nodata_value, np.logical_not(rv_array)), (rds.RasterCount, 1, 1))) try: class_val = feat.GetFieldAsString(class_attr) except: pass #import ipdb; ipdb.set_trace() if ret_vals: stacked = masked.reshape(rds.RasterCount, masked[0, :, :].size) stacked = DataFrame(columns=cols, data=stacked.transpose()).dropna() if class_val is not None: stacked = stacked.set_index( np.repeat(class_val, stacked.shape[0])) vals = vals.append(stacked) if ret_stats: feature_stats = { 'min': float(masked.min()), 'mean': float(masked.mean()), 'max': float(masked.max()), 'std': float(masked.std()), 'sum': float(masked.sum()), 'count': int(masked.count()), 'fid': int(feat.GetFID()) } stats.append(feature_stats) rvds = None mem_ds = None feat = vlyr.GetNextFeature() vds = None rds = None return (vals, stats)
if not wordtag[0] in stopwords_split and not wordtag[ 0] in punctuation: if not wordtag[1] in tag_to_int: tag_to_int[wordtag[1]] = tag_number tag_number += 1 indx = index + 1 term_ferquency.append(tf[wordtag[0]]) inverse_doc_frequency.append(idf[wordtag[0]]) positions.append(position(indx, title_len, context_len)) in_title.append(is_in_l(wordtag[0], title)) in_context.append(is_in_l(wordtag[0], context)) in_label.append(is_in_l(wordtag[0], tag)) pos_tag.append(tag_to_int[wordtag[1]]) table = [ term_ferquency, inverse_doc_frequency, positions, in_title, in_context, pos_tag, in_label ] df = DataFrame(table) df = df.transpose() cols = [ 'term_ferquency', 'inverse_doc_frequency', 'positions', 'in_title', 'in_context', 'pos_tag', 'in_label' ] df.columns = cols df.to_csv(f, mode='a', index=False, header=False) print 'Data builed : {}/6 ; Operation time : {:04.2f} minute'.format( i + 1, (time.time() - starttime) / 60) f.close() print 'Training data saved : training_data.csv'
def read_lmw(admFile, datFile, kwaFile): with open(admFile) as f: administration = f.readlines() with open(datFile) as f: data = f.readlines() with open(kwaFile) as f: data_quality = f.readlines() if len(administration) != len(data): raise Exception("Input data is not of same length.") # LMW interval in minutes interval = 10 val_series = [] timestamp_series = [] remoteid_series = [] quality_series = [] zom_win = [] for i in range(len(administration)): values = administration[i].split(",") # Get the id of the timeserie timeseriesId = values[0].strip() +\ "_" + values[1].strip() + "_" + values[3].strip() # Get the time of the first value if values[7].find('MET') == -1: zom_win = 'summer' values[7] = values[7].replace("JAN", "01") values[7] = values[7].replace("FEB", "02") values[7] = values[7].replace("MRT", "03") values[7] = values[7].replace("APR", "04") values[7] = values[7].replace("MEI", "05") values[7] = values[7].replace("JUN", "06") values[7] = values[7].replace("JUL", "07") values[7] = values[7].replace("AUG", "08") values[7] = values[7].replace("SEP", "09") values[7] = values[7].replace("OKT", "10") values[7] = values[7].replace("NOV", "11") values[7] = values[7].replace("DEC", "12") values[7] = values[7].replace("Z03", "") values[7] = values[7].replace("MET", "") values[7] = values[7].strip() if zom_win == 'summer': timeFirstValue = datetime.strptime(values[7], "%d-%m-%y %H:%M") -\ timedelta(0, 0, 0, 0, 120) else: timeFirstValue = datetime.strptime(values[7], "%d-%m-%y %H:%M") -\ timedelta(0, 0, 0, 0, 60) # Get all the measurements measurements = data[i].split(",") quality = data_quality[i].split(",") if len(measurements) != 7: raise Exception("Invalid number of measurements for timeserie.") if len(quality) != 7: raise Exception("Invalid number of quality flags for timeserie.") counter = 0 for j in range(6): value = measurements[j].strip() value_flag = int(quality[j]) if value != "f" and value != "n": TimeForValue = timeFirstValue +\ timedelta(0, 0, 0, 0, interval * j) val_series.append(float(value)) timestamp_series.append(TimeForValue) remoteid_series.append(timeseriesId) counter += 1 if value_flag in [10, 30, 50, 70]: quality_series.append('0') elif value_flag in [2, 22, 24, 28, 42, 44, 48, 62, 68]: quality_series.append('3') else: quality_series.append('6') tsobj = DataFrame([remoteid_series, val_series, quality_series]) tsobj = tsobj.transpose() tsobj.columns = ['SensorID', 'value', 'flag'] tstamp = DataFrame(timestamp_series, columns=['ts']) tsobj_indexed = tsobj.set_index(tstamp['ts']) return tsobj_indexed
def normalize_df(target: DataFrame, normer: DataFrame, ind_sep: Optional[str] = "-", alphas: Optional[Iterable[float]] = None, cv: float = 5, **RidgeCV_kws) -> DataFrame: """ Used to normalize a dataset by another dataset, using a linear model with regularization chosen through cross validation (aka sklearn's RidgeCV). This is useful for normalizing, for example, RNA values by CNA, or phosphopeptide values by protein abundance. If target and normer dataframe row IDs (index) match 1:1, pass None for ind_sep. Args: target: Dataframe of values to normalize. Row IDs (index) before the sep (or whole ID if no sep) must match normer IDs. Row IDs must be unique. normer: Dataframe of values to use for normalization. Row IDs must match all or pre-ind_sep portions of target row IDs. Row IDs must be unique. ind_sep: If multiple rows in target map to 1 row in normer, the delimiter used to split the unique ID that matches the normer IDs. Defaul "-" alphas: Parameters to try for regulariztion. If None, tries powers of 2 from -10 to 10. cv: Fold for cross validation. Also the minimum number of non-null values for each row. Default 5 **RidgeCV_kws: kws to pass to sklearn's RidgeCV Returns: normed The target dataframe normalized by the normer dataframe. Only includes rows with sufficient non-null values from both dataframe. """ if not alphas: alphas = [2**i for i in range(-10, 10, 1)] normer = normer[[col for col in target.columns if col in normer.columns]] target = target[normer.columns] if (len(normer.columns) < cv) or (len(target.columns) < cv): raise KeyError( "target and normer dataframes do not have at least %s columns in common" % cv) target = target.transpose() target["col0"] = 0 target.set_index("col0", append=True, inplace=True) target = target.reorder_levels( [target.index.names[-1], target.index.names[0]]).transpose() normer = normer.transpose() normer["col0"] = 1 normer.set_index("col0", append=True, inplace=True) normer = normer.reorder_levels( [normer.index.names[-1], normer.index.names[0]]).transpose() target["gene"] = [i.split(ind_sep)[0] for i in target.index] target = target.loc[target["gene"].isin(normer.index), :] if len(target) == 0: raise KeyError("No rows in common between target and normer") logging.info( "Normalizing %s common rows and %s common samples between target and normer" % (len(target), len(normer.columns))) data = target.merge(normer, how="left", left_on="gene", right_index=True) model = lm.RidgeCV(alphas=alphas, cv=cv, **RidgeCV_kws) normed = data.apply( (lambda row: _convert_to_residuals(row[0], row[1], model)), axis=1) return normed
def ComputeMetrics1(stats, filename): """ DESCRIPTION :Parameters: NAME : TYPE DESCRIPTIOIN :Return: DESCRIPTION """ data = {} for article in stats: metrics = {} temp = {} title = article['article-title'] # get metrics from data allActions = GetMetric(article, 'total-actions') number_tokens = GetMetric(article, 'number-tokens') maintainanceTag = GetMetric(article, 'tag-maintained') # split metrics between maintainer and others addsMaintainer, addsOthers = SplitMO(article, maintainers[index], 'tokens-added') deletesMaintainer, deletesOthers = SplitMO(article, maintainers[index], 'tokens-deleted') revertsMaintainer, revertsOthers = SplitMO(article, maintainers[index], 'tokens-reverted') antActionsMaintainer, antActionsOthers = SplitMO( article, maintainers[index], 'antagonistic-actions') reintroMaintainer, reintroOthers = SplitMO(article, maintainers[index], 'tokens-reintroduced') selfreintroMaintainer, selfreintroOthers = SplitMO( article, maintainers[index], 'tokens-self-reintroduced') talkpageMaintainer, talkpageOthers = SplitMO(article, maintainers[index], 'talkpage-edits') # BLABLA ownershipMaintainerAbs = GetOwnership(article, maintainers_id[index], 'tokens-absolute') ownershipMaintainerRel = GetOwnership(article, maintainers_id[index], 'tokens-relative') # get properties of article metrics['firstMaintRev'] = GetFirstMaintainedRev(maintainanceTag) metrics['maintainer-name'] = article['maintainer-name'] metrics['maintainer-id'] = article['maintainer-id'] metrics['all-actions'] = sum(allActions) metrics['edits-maintainer'] = len(addsMaintainer) metrics['edits-others'] = len(addsOthers) metrics['number-revisions'] = metrics['edits-maintainer'] + metrics[ 'edits-others'] # temporal comparison TempCompare() # to relativize with edits is just an assumptions to have something. if talkpageOthers: #metrics['talkPageRatio'] = sum(talkpageMaintainer) / float(metrics['edits-maintainer']) / float( sum(talkpageOthers) / float(metrics['edits-others']) ) metrics['talkPageRatio'] = sum(talkpageMaintainer) / float( sum(talkpageOthers)) else: metrics['talkPageRatio'] = 0 # if metrics['all-actions'] is 0: # metrics['addsMaintainerAvg'] = 0 # metrics['addsOthersAvg'] = 0 # metrics['addsRatio'] = 0 # metrics['deletesMaintainerRel'] = 0 # metrics['deletesOthersRel'] = 0 # metrics['deletesRatio'] = 0 # metrics['revertsMaintainerRel'] = 0 # metrics['revertsOthersRel'] = 0 # metrics['revertsRatio'] = 0 # metrics['reintroMaintainerAvg'] = 0 # metrics['reintroOthersAvg'] = 0 # metrics['selfreintroMaintainerAvg'] = 0 # metrics['selfreintroOthersAvg'] = 0 # metrics['selfreintroRatio'] = 0 # metrics['antActionsMaintainerAvg'] = 0 # metrics['antActionsOthersAvg'] = 0 # metrics['negActionsRatio'] = 0 # metrics['targetedIntroRatio'] = 0 # metrics['addsMaintainerRel'] = sum(addsMaintainer)/float(metrics['all-actions']) # metrics['addsOthersRel'] = sum(addsOthers)/float(metrics['all-actions']) # metrics['addsRatio'] = metrics['addsMaintainerRel'] / float(metrics['addsOthersRel']) # metrics['deletesMaintainerRel'] = sum(deletesMaintainer)/float(metrics['all-actions']) # metrics['deletesOthersRel'] = sum(deletesOthers)/float(metrics['all-actions']) # metrics['deletesRatio'] = metrics['deletesMaintainerRel'] / float(metrics['deletesOthersRel']) # metrics['revertsMaintainerRel'] = sum(revertsMaintainer)/float(metrics['all-actions']) # metrics['revertsOthersRel'] = sum(revertsOthers)/float(metrics['all-actions']) # metrics['revertsRatio'] = metrics['revertsMaintainerRel'] / float(metrics['revertsOthersRel']) # metrics['reintroMaintainerRel'] = sum(reintroMaintainer)/float(metrics['all-actions']) # metrics['reintroOthersRel'] = sum(reintroOthers)/float(metrics['all-actions']) # metrics['selfreintroMaintainerRel'] = sum(selfreintroMaintainer)/float(metrics['all-actions']) # metrics['selfreintroOthersRel'] = sum(selfreintroOthers)/float(metrics['all-actions']) # if metrics['selfreintroOthersAvg'] == 0: # metrics['selfreintroRatio'] = 0 # else: # metrics['selfreintroRatio'] = metrics['selfreintroMaintainerAvg'] / float(metrics['selfreintroOthersAvg']) # if metrics['antActionsOthersAvg'] == 0: # metrics['antActionsRatio'] = 0 # else: # metrics['antActionsRatio'] = metrics['antActionsMaintainerAvg'] / float(metrics['antActionsOthersAvg']) # if metrics['reintroMaintainerAvg'] == 0 or metrics['selfreintroOthersAvg'] == 0 or metrics['reintroOthersAvg'] == 0: # metrics['targetedIntroRatio'] = 0 # metrics['targetedIntroRatio2Ownership'] = 0 # else: # metrics['targetedIntroRatio'] = (metrics['selfreintroMaintainerAvg'] / float(metrics['reintroMaintainerAvg'])) \ # / float((metrics['selfreintroOthersAvg'] / float(metrics['reintroOthersAvg']))) # #metrics['targetedIntroRatio2Ownership'] = (metrics['selfreintroMaintainerRel'] / float(metrics['reintroMaintainerRel'])) \ # # / float((metrics['selfreintroOthersRel'] / float(metrics['reintroOthersRel']))) metrics['addsMaintainerAvg'] = sum(addsMaintainer) / float( metrics['edits-maintainer']) metrics['addsOthersAvg'] = sum(addsOthers) / float( metrics['edits-others']) metrics['addsRatio'] = metrics['addsMaintainerAvg'] / float( metrics['addsOthersAvg']) metrics['reintroMaintainerAvg'] = sum(reintroMaintainer) / float( metrics['edits-maintainer']) metrics['reintroOthersAvg'] = sum(reintroOthers) / float( metrics['edits-others']) metrics['reintroRatio'] = metrics['reintroMaintainerAvg'] / float( metrics['reintroOthersAvg']) metrics['selfreintroMaintainerAvg'] = sum( selfreintroMaintainer) / float(metrics['edits-maintainer']) metrics['selfreintroOthersAvg'] = sum(selfreintroOthers) / float( metrics['edits-others']) metrics[ 'selfreintroRatio'] = metrics['selfreintroMaintainerAvg'] / float( metrics['selfreintroOthersAvg']) metrics['antActionsMaintainerAvg'] = sum(antActionsMaintainer) / float( metrics['edits-maintainer']) metrics['antActionsOthersAvg'] = sum(antActionsOthers) / float( metrics['edits-others']) # metrics['deletesMaintainerAvg'] = sum(deletesMaintainer)/float(metrics['edits-maintainer']) # metrics['deletesOthersAvg'] = sum(deletesOthers)/float(metrics['edits-others']) # metrics['deletesRatio'] = sum(metrics['deletesMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(temp['deletesOthersAvg']) / float(metrics['edits-others'])) # metrics['revertsMaintainerAvg'] = sum(revertsMaintainer)/float(metrics['edits-maintainer']) # metrics['revertsOthersAvg'] = sum(revertsOthers)/float(metrics['edits-others']) # metrics['revertsRatio'] = sum(metrics['revertsMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['revertsOthersAvg']) / float(metrics['edits-others'])) # metrics['revertsMaintainerPot'] = sum(revertsMaintainer)/float(metrics['edits-maintainer']) # metrics['revertsOthersPot'] = sum(revertsOthers)/float(metrics['edits-others']) # metrics['revertsPotRatio'] = sum(metrics['revertsMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['revertsOthersAvg']) / float(metrics['edits-others'])) # metrics['reintroMaintainerAvg'] = sum(reintroMaintainer)/float(metrics['edits-maintainer']) # metrics['reintroOthersAvg'] = sum(reintroOthers)/float(metrics['edits-others']) # metrics['reintroRatio'] = sum(metrics['reintroMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['reintroOthersAvg']) / float(metrics['edits-others'])) # metrics['selfreintroMaintainerAvg'] = sum(selfreintroMaintainer)/float(metrics['edits-maintainer']) # metrics['selfreintroOthersAvg'] = sum(selfreintroOthers)/float(metrics['edits-others']) # metrics['selfreintroRatio'] = sum(metrics['selfreintroMaintainerAvg']) / float(metrics['edits-maintainer']) / float(sum(metrics['selfreintroOthersAvg']) / float(metrics['edits-others'])) # share of selfreintroductions of potential own tokens # temp['selfreintroMaintainerPot'] = [(b/float(a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], selfreintroMaintainer[1:len(selfreintroMaintainer)-1])] # temp['selfreintroOthersPot'] = [(b/float(c-a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], selfreintroOthers[1:len(selfreintroOthers)-1], number_tokens[:len(number_tokens)-2) if a is not 0] # metrics['selfreintroPotRatio'] = sum(temp['selfreintroMaintainerPot']) / float(metrics['edits-maintainer']) / float(sum(temp['selfreintroOthersPot']) / float(metrics['edits-others'])) # temp['antActionsMaintainerPot'] = [(b/float(a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], antActionsMaintainer[1:len(antActionsMaintainer)-1]) if a is not 0] # temp['antActionsOthersPot'] = [(b/float(c-a)) for a,b in zip(ownershipMaintainerAbs[:len(ownershipMaintainerAbs)-2], antActionsOthers[1:len(antActionsOthers)-1], number_tokens[:len(number_tokens)-2]) if a is not 0] # metrics['antActionsRatio'] = sum(temp['antActionsMaintainerPot']) / float(metrics['edits-maintainer']) / float(sum(temp['antActionsOthersPot']) / float(metrics['edits-others'])) data[title] = metrics data = DataFrame(data) data = data.transpose() save2CSV(data, filename) return data
pp.ylabel('incidenten') pp.tight_layout(pad=3.0) pp.show() groups = series['2014':'2019'].groupby(Grouper(freq='A')) years = DataFrame() for name, group in groups: years[name.year] = group.values # Box and Whisker Plots pp.figure(figsize=(6, 4), dpi=100, edgecolor='k') years.boxplot() pp.title('Trend') pp.tight_layout(pad=3.0) pp.show() years = years.transpose() pp.figure(figsize=(6, 4), dpi=100, edgecolor='k') years.boxplot() pp.tight_layout(pad=3.0) pp.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [ 'jan', 'feb', 'mrt', 'apr', 'mei', 'jun', 'jul', 'aug', 'sep', 'okt', 'nov', 'dec' ]) pp.title('Seizoen') pp.show() # isoleer het laatste jaar (12 maanden) in een afzonderlijke data/testset split_point = len(series) - 12 dataset, validation = series[0:split_point], series[split_point:] print('Dataset %d, Validation %d' % (len(dataset), len(validation)))
data = { 'name': ['박대성', '백상훈', '이권수', '이우현'], 'money': [5000, 2000000, 5000000, 80000000], 'sex': ['M', 'M', 'M', 'M'] } print(data) print('---------dict --> DataFrame 변경---') df = DataFrame(data) print(df) print(type(df)) print('------------------') df = DataFrame(data, columns=['money', 'sex', 'name']) print(df) print(type(df)) print('---------iloc / loc ------') print(df.iloc[0:2]) print('-------------------------------------------') print(df.loc[0]) print('---열 추가 -----') df['level'] = df['money'] > 5000 print(df) print('---data 추출-----') print(df[df.name == '박대성']) print(df[(df.money > 10000) & (df.sex == 'M')]) print('----행 과 열 치환------') print(df.transpose())
def test_frame_from_json_to_json(self): def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_dtype=True, raise_ok=None, sort=None, check_index_type=True, check_column_type=True, check_numpy_dtype=False): if sort is not None: df = df.sort_values(sort) else: df = df.sort_index() # if we are not unique, then check that we are raising ValueError # for the appropriate orients if not df.index.is_unique and orient in ['index', 'columns']: pytest.raises( ValueError, lambda: df.to_json(orient=orient)) return if (not df.columns.is_unique and orient in ['index', 'columns', 'records']): pytest.raises( ValueError, lambda: df.to_json(orient=orient)) return dfjson = df.to_json(orient=orient) try: unser = read_json(dfjson, orient=orient, dtype=dtype, numpy=numpy, convert_axes=convert_axes) except Exception as detail: if raise_ok is not None: if isinstance(detail, raise_ok): return raise if sort is not None and sort in unser.columns: unser = unser.sort_values(sort) else: unser = unser.sort_index() if dtype is False: check_dtype = False if not convert_axes and df.index.dtype.type == np.datetime64: unser.index = DatetimeIndex( unser.index.values.astype('i8') * 1e6) if orient == "records": # index is not captured in this orientation tm.assert_almost_equal(df.values, unser.values, check_dtype=check_numpy_dtype) tm.assert_index_equal(df.columns, unser.columns, exact=check_column_type) elif orient == "values": # index and cols are not captured in this orientation if numpy is True and df.shape == (0, 0): assert unser.shape[0] == 0 else: tm.assert_almost_equal(df.values, unser.values, check_dtype=check_numpy_dtype) elif orient == "split": # index and col labels might not be strings unser.index = [str(i) for i in unser.index] unser.columns = [str(i) for i in unser.columns] if sort is None: unser = unser.sort_index() tm.assert_almost_equal(df.values, unser.values, check_dtype=check_numpy_dtype) else: if convert_axes: tm.assert_frame_equal(df, unser, check_dtype=check_dtype, check_index_type=check_index_type, check_column_type=check_column_type) else: tm.assert_frame_equal(df, unser, check_less_precise=False, check_dtype=check_dtype) def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None, sort=None, check_index_type=True, check_column_type=True): # numpy=False if convert_axes: _check_orient(df, "columns", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "records", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "split", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "index", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "values", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "columns", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "records", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "split", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "index", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "values", dtype=dtype, convert_axes=False, sort=sort) # numpy=True and raise_ok might be not None, so ignore the error if convert_axes: _check_orient(df, "columns", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "records", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "split", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "index", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "values", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "columns", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "records", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "split", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "index", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "values", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) # basic _check_all_orients(self.frame) assert self.frame.to_json() == self.frame.to_json(orient="columns") _check_all_orients(self.intframe, dtype=self.intframe.values.dtype) _check_all_orients(self.intframe, dtype=False) # big one # index and columns are strings as all unserialised JSON object keys # are assumed to be strings biggie = DataFrame(np.zeros((200, 4)), columns=[str(i) for i in range(4)], index=[str(i) for i in range(200)]) _check_all_orients(biggie, dtype=False, convert_axes=False) # dtypes _check_all_orients(DataFrame(biggie, dtype=np.float64), dtype=np.float64, convert_axes=False) _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int, convert_axes=False) _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3', convert_axes=False, raise_ok=ValueError) # categorical _check_all_orients(self.categorical, sort='sort', raise_ok=ValueError) # empty _check_all_orients(self.empty_frame, check_index_type=False, check_column_type=False) # time series data _check_all_orients(self.tsframe) # mixed data index = pd.Index(['a', 'b', 'c', 'd', 'e']) data = {'A': [0., 1., 2., 3., 4.], 'B': [0., 1., 0., 1., 0.], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': [True, False, True, False, True]} df = DataFrame(data=data, index=index) _check_orient(df, "split", check_dtype=False) _check_orient(df, "records", check_dtype=False) _check_orient(df, "values", check_dtype=False) _check_orient(df, "columns", check_dtype=False) # index oriented is problematic as it is read back in in a transposed # state, so the columns are interpreted as having mixed data and # given object dtypes. # force everything to have object dtype beforehand _check_orient(df.transpose().transpose(), "index", dtype=False)
def __init__(self, workbench, data_path = "/home/moritz/people/MoreData/genomes/TOBG/", clean = False): Database.__init__(self,workbench = workbench, data_path = data_path) wb = load_workbook("metadata/Table3_GenomeStats.xlsx") t_metadata = DataFrame([l for i,l in enumerate(wb['Sheet1'].values) if i >1], columns=[l for l in wb['Sheet1'].values][1]) corrected = { u'\xc2Gemmatimonadetes': 'Gemmatimonadetes' , 'marinegroup': 'Puniceicoccaceae', 'Urania1B19': 'Phycisphaerae', 'Thalassopira' : 'Thalassospira', 'SM1A02': 'Phycisphaerae', 'SAR324cluster': 'SAR324 cluster', 'unclassifiedAlphaproteobacteria': 'Alphaproteobacteria', 'SAR202-2': 'SAR202 cluster', 'SAR202-1': 'SAR202 cluster', 'SAR116cluster' : 'SAR116 cluster', 'OPB35soil': 'unidentified Verrucomicrobium group OPB35', 'Pla3': 'Planctomycetes', 'OM190': 'Planctomycetes', 'NovelClass_B': 'Ignavibacteriae', 'Nitropelagicus': 'Candidatus Nitrosopelagicus' , 'Nanoarchaoeta': 'Nanoarchaeota', 'Methylobacterum': 'Methylobacterium', 'JL-ENTP-F27': 'Phycisphaerae', 'FS140-16B-02marinegroup': 'Phycisphaerae', 'Epsilonbacteraeota': 'Bacteria', 'DEV007': 'Verrucomicrobiales', 'CandidatusPuniceispirillum': 'Candidatus Puniceispirillum', 'CandidatePhylaRadiation' : 'Bacteria candidate phyla', 'CaThioglobus': 'Candidatus Thioglobus', 'CaAtelocyanobacterium' : 'Candidatus Atelocyanobacterium', '0319-6G20': 'Bdellovibrionales', 'Euryarcheota' : 'Euryarchaeota' , 'SBR1093' : 'Bacteria', 'Euryarcheoata' : 'Euryarchaeota' } regions = { 'NP' : 'North_Pacific', 'NAT' : 'North_Atlantic', 'MED' : 'Mediterranean', 'ARS' : 'Arabian_Sea', 'RS' : 'Red_Sea', 'IN' : 'Indian_Ocean', 'EAC' : 'East_Africa_Coastal', 'SAT' : 'South_Atlantic', 'CPC' : 'Chile_Peru_Coastal', 'SP' : 'South_Pacific' } wb2 = load_workbook("metadata/Table4_Phylogeny.xlsx") taxos = { l[0] : [v for v in l[:-1] if v != 'null' and not v[0:4] == "nove" ][-1] for l in wb2.get_sheet_by_name('Hug set').values} taxos = {k : corrected[v] if corrected.has_key(v) else v for k, v in taxos.items()} tax_2_id = self.taxDb.get_name_translator(taxos.values()) tax_ids = {g : tax_2_id.get(taxos[g])[0] for g in t_metadata['Genome ID'] if taxos.has_key(g) } t_metadata['species_taxid'] = [ tax_ids[g] if tax_ids.has_key(g) else 131567 for g in t_metadata['Genome ID']] t_metadata.index = Index(t_metadata['Genome ID']) t_metadata['region'] = [regions[g.split("_")[1].split("-")[0]] for g in t_metadata['Genome ID']] self.metadata = t_metadata.transpose().to_dict() print "Loading genomes" if os.path.exists(pjoin(self.data_path , 'TOBGGENOMES.tar.gz')): os.system("tar xzvf " + pjoin(self.data_path , 'TOBGGENOMES.tar.gz')) os.remove(pjoin(self.data_path , 'TOBGGENOMES.tar.gz')) for k,v in tqdm(self.metadata.items()): genome_path = pjoin(self.data_path, v['region'], k) genome_file = pjoin(genome_path, k + ".fna") if not os.path.exists(genome_file): os.makedirs(pjoin(genome_path, 'original_files')) shutil.move(self.data_path + k + ".fna", pjoin(genome_path, 'original_files')) self.genomes += [Genome(k, genome_path, ref=pjoin(genome_path, 'original_files', k + ".fna"), manual_metadata = v, taxDb = self.taxDb, workbench = self.workbench)]
def test_frame_from_json_to_json(self): def _check_orient( df, orient, dtype=None, numpy=False, convert_axes=True, check_dtype=True, raise_ok=None, sort=None, check_index_type=True, check_column_type=True, ): if sort is not None: df = df.sort_values(sort) else: df = df.sort_index() # if we are not unique, then check that we are raising ValueError # for the appropriate orients if not df.index.is_unique and orient in ["index", "columns"]: self.assertRaises(ValueError, lambda: df.to_json(orient=orient)) return if not df.columns.is_unique and orient in ["index", "columns", "records"]: self.assertRaises(ValueError, lambda: df.to_json(orient=orient)) return dfjson = df.to_json(orient=orient) try: unser = read_json(dfjson, orient=orient, dtype=dtype, numpy=numpy, convert_axes=convert_axes) except Exception as detail: if raise_ok is not None: if isinstance(detail, raise_ok): return raise if sort is not None and sort in unser.columns: unser = unser.sort_values(sort) else: unser = unser.sort_index() if dtype is False: check_dtype = False if not convert_axes and df.index.dtype.type == np.datetime64: unser.index = DatetimeIndex(unser.index.values.astype("i8") * 1e6) if orient == "records": # index is not captured in this orientation assert_almost_equal(df.values, unser.values) self.assertTrue(df.columns.equals(unser.columns)) elif orient == "values": # index and cols are not captured in this orientation if numpy is True and df.shape == (0, 0): assert unser.shape[0] == 0 else: assert_almost_equal(df.values, unser.values) elif orient == "split": # index and col labels might not be strings unser.index = [str(i) for i in unser.index] unser.columns = [str(i) for i in unser.columns] if sort is None: unser = unser.sort_index() assert_almost_equal(df.values, unser.values) else: if convert_axes: assert_frame_equal( df, unser, check_dtype=check_dtype, check_index_type=check_index_type, check_column_type=check_column_type, ) else: assert_frame_equal(df, unser, check_less_precise=False, check_dtype=check_dtype) def _check_all_orients( df, dtype=None, convert_axes=True, raise_ok=None, sort=None, check_index_type=True, check_column_type=True ): # numpy=False if convert_axes: _check_orient(df, "columns", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "records", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "split", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "index", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "values", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "columns", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "records", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "split", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "index", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "values", dtype=dtype, convert_axes=False, sort=sort) # numpy=True and raise_ok might be not None, so ignore the error if convert_axes: _check_orient( df, "columns", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False, ) _check_orient( df, "records", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False, ) _check_orient( df, "split", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False, ) _check_orient( df, "index", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False, ) _check_orient( df, "values", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False, ) _check_orient(df, "columns", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "records", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "split", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "index", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "values", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) # basic _check_all_orients(self.frame) self.assertEqual(self.frame.to_json(), self.frame.to_json(orient="columns")) _check_all_orients(self.intframe, dtype=self.intframe.values.dtype) _check_all_orients(self.intframe, dtype=False) # big one # index and columns are strings as all unserialised JSON object keys # are assumed to be strings biggie = DataFrame(np.zeros((200, 4)), columns=[str(i) for i in range(4)], index=[str(i) for i in range(200)]) _check_all_orients(biggie, dtype=False, convert_axes=False) # dtypes _check_all_orients(DataFrame(biggie, dtype=np.float64), dtype=np.float64, convert_axes=False) _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int, convert_axes=False) _check_all_orients(DataFrame(biggie, dtype="U3"), dtype="U3", convert_axes=False, raise_ok=ValueError) # categorical _check_all_orients(self.categorical, sort="sort", raise_ok=ValueError) # empty _check_all_orients(self.empty_frame, check_index_type=False, check_column_type=False) # time series data _check_all_orients(self.tsframe) # mixed data index = pd.Index(["a", "b", "c", "d", "e"]) data = { "A": [0.0, 1.0, 2.0, 3.0, 4.0], "B": [0.0, 1.0, 0.0, 1.0, 0.0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": [True, False, True, False, True], } df = DataFrame(data=data, index=index) _check_orient(df, "split", check_dtype=False) _check_orient(df, "records", check_dtype=False) _check_orient(df, "values", check_dtype=False) _check_orient(df, "columns", check_dtype=False) # index oriented is problematic as it is read back in in a transposed # state, so the columns are interpreted as having mixed data and # given object dtypes. # force everything to have object dtype beforehand _check_orient(df.transpose().transpose(), "index", dtype=False)
label=None) # 真の曲線を表示 linex = np.linspace(0, 1, 101) liney = np.sin(2 * np.pi * linex) subplot.plot(linex, liney, color='green', linestyle='--') # 多項式近似の曲線を表示 linex = np.linspace(0, 1, 101) liney = f(linex) label = "E(RMS)=%.2f" % rms_error(train_set, f) subplot.plot(linex, liney, color='red', label=label) subplot.legend(loc=1) # 係数の値を表示 print("Table of the coefficients") print(df_ws.transpose()) fig.show() # トレーニングセットとテストセットでの誤差の変化を表示 df = DataFrame(columns=['Training set', 'Test set']) for m in range(0, 10): # 多項式の次数 f, ws = resolve(train_set, m) train_error = rms_error(train_set, f) test_error = rms_error(test_set, f) df = df.append(Series([train_error, test_error], index=['Training set', 'Test set']), ignore_index=True) df.plot(title='RMS Error', style=['-', '--'], grid=True, ylim=(0, 0.9)) plt.show()
def test_frame_from_json_to_json(self): def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_dtype=True, raise_ok=None, sort=None, check_index_type=True, check_column_type=True, check_numpy_dtype=False): if sort is not None: df = df.sort_values(sort) else: df = df.sort_index() # if we are not unique, then check that we are raising ValueError # for the appropriate orients if not df.index.is_unique and orient in ['index', 'columns']: self.assertRaises( ValueError, lambda: df.to_json(orient=orient)) return if (not df.columns.is_unique and orient in ['index', 'columns', 'records']): self.assertRaises( ValueError, lambda: df.to_json(orient=orient)) return dfjson = df.to_json(orient=orient) try: unser = read_json(dfjson, orient=orient, dtype=dtype, numpy=numpy, convert_axes=convert_axes) except Exception as detail: if raise_ok is not None: if isinstance(detail, raise_ok): return raise if sort is not None and sort in unser.columns: unser = unser.sort_values(sort) else: unser = unser.sort_index() if dtype is False: check_dtype = False if not convert_axes and df.index.dtype.type == np.datetime64: unser.index = DatetimeIndex( unser.index.values.astype('i8') * 1e6) if orient == "records": # index is not captured in this orientation assert_almost_equal(df.values, unser.values, check_dtype=check_numpy_dtype) self.assert_index_equal(df.columns, unser.columns, exact=check_column_type) elif orient == "values": # index and cols are not captured in this orientation if numpy is True and df.shape == (0, 0): assert unser.shape[0] == 0 else: assert_almost_equal(df.values, unser.values, check_dtype=check_numpy_dtype) elif orient == "split": # index and col labels might not be strings unser.index = [str(i) for i in unser.index] unser.columns = [str(i) for i in unser.columns] if sort is None: unser = unser.sort_index() assert_almost_equal(df.values, unser.values, check_dtype=check_numpy_dtype) else: if convert_axes: assert_frame_equal(df, unser, check_dtype=check_dtype, check_index_type=check_index_type, check_column_type=check_column_type) else: assert_frame_equal(df, unser, check_less_precise=False, check_dtype=check_dtype) def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None, sort=None, check_index_type=True, check_column_type=True): # numpy=False if convert_axes: _check_orient(df, "columns", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "records", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "split", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "index", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "values", dtype=dtype, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "columns", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "records", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "split", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "index", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "values", dtype=dtype, convert_axes=False, sort=sort) # numpy=True and raise_ok might be not None, so ignore the error if convert_axes: _check_orient(df, "columns", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "records", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "split", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "index", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "values", dtype=dtype, numpy=True, raise_ok=raise_ok, sort=sort, check_index_type=False, check_column_type=False) _check_orient(df, "columns", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "records", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "split", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "index", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) _check_orient(df, "values", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) # basic _check_all_orients(self.frame) self.assertEqual(self.frame.to_json(), self.frame.to_json(orient="columns")) _check_all_orients(self.intframe, dtype=self.intframe.values.dtype) _check_all_orients(self.intframe, dtype=False) # big one # index and columns are strings as all unserialised JSON object keys # are assumed to be strings biggie = DataFrame(np.zeros((200, 4)), columns=[str(i) for i in range(4)], index=[str(i) for i in range(200)]) _check_all_orients(biggie, dtype=False, convert_axes=False) # dtypes _check_all_orients(DataFrame(biggie, dtype=np.float64), dtype=np.float64, convert_axes=False) _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int, convert_axes=False) _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3', convert_axes=False, raise_ok=ValueError) # categorical _check_all_orients(self.categorical, sort='sort', raise_ok=ValueError) # empty _check_all_orients(self.empty_frame, check_index_type=False, check_column_type=False) # time series data _check_all_orients(self.tsframe) # mixed data index = pd.Index(['a', 'b', 'c', 'd', 'e']) data = {'A': [0., 1., 2., 3., 4.], 'B': [0., 1., 0., 1., 0.], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': [True, False, True, False, True]} df = DataFrame(data=data, index=index) _check_orient(df, "split", check_dtype=False) _check_orient(df, "records", check_dtype=False) _check_orient(df, "values", check_dtype=False) _check_orient(df, "columns", check_dtype=False) # index oriented is problematic as it is read back in in a transposed # state, so the columns are interpreted as having mixed data and # given object dtypes. # force everything to have object dtype beforehand _check_orient(df.transpose().transpose(), "index", dtype=False)
def fig_graph( data: DataFrame, title: str, percentile_cutoff: float = 0.5, percentile_bins: int = 100, ) -> Figure: data = data.copy() assert 0.0 <= percentile_cutoff <= 1.0 assert percentile_bins > 0 assert list(data.index) == list(data.columns) assert np.allclose(data.values, data.transpose().values, equal_nan=True) diag = data.values.diagonal().copy() np.fill_diagonal(data.values, np.nan) print(f"Creating figure for dataframe of shape {data.shape}") drop_labels = data[ data.sum().rank(method="first", pct=True) <= percentile_cutoff].index print(f"\t{percentile_cutoff=}, dropping {len(drop_labels)} entries") data.drop(drop_labels, axis=0, inplace=True) data.drop(drop_labels, axis=1, inplace=True) G = ig.Graph.Adjacency( np.where( np.logical_and(np.isfinite(data.values), np.greater(data.values, 0)), True, False, ).tolist(), mode=ig.ADJ_UNDIRECTED, ) # clustering = G.community_fastgreedy().as_clustering() # G = clustering.graph # clusters = clustering.membership # layout = G.layout('sphere', dim=3) layout = G.layout_auto(dim=3) x_n, y_n, z_n = list(zip(*layout)) min_size, max_size = 5, 10 # in px if data.shape[0] > 1: sizes = diag sizes /= sizes.max() # in [0, 1] sizes = (min_size + (max_size - min_size) * sizes ) # in [min_size, max_size] else: sizes = [max_size for _ in range(data.shape[0])] weights = list( data.fillna(0).values[e.source][e.target].item() for e in G.es) if len(weights) > 0: centralities = np.array(G.betweenness(weights=weights)) if centralities.min() != centralities.max(): centralities = (centralities - centralities.min()) / ( centralities.max() - centralities.min()) else: centralities = [] node_trace = go.Scatter3d( x=x_n, y=y_n, z=z_n, mode="markers+text", marker=dict( size=sizes, sizemode="diameter", color=centralities, opacity=1, colorscale="Jet", colorbar=dict(title="vertex betweenness", thickness=5), ), hovertext=data.columns, hoverinfo="text", hoverlabel=dict(bgcolor="white"), showlegend=False, ) print(f"\tcreated scatter plot with {len(x_n)} nodes") percentiles = (data.unstack().rank(method="first", pct=True).values.reshape(data.shape)) x_e = {i: [] for i in range(percentile_bins)} y_e = {i: [] for i in range(percentile_bins)} z_e = {i: [] for i in range(percentile_bins)} min_width, max_width = min_size / 2, max_size / 2 # in px linspace_width = np.linspace(min_width, max_width, num=percentile_bins) w_e = dict(enumerate(linspace_width)) min_alpha, max_alpha = 0.01, 0.2 linspace_alpha = np.linspace(min_alpha, max_alpha, num=percentile_bins) a_e = dict(enumerate(linspace_alpha)) quantiles = [] for e in G.es: s = e.source t = e.target quantile = min(percentile_bins - 1, int(percentile_bins * percentiles[s][t])) quantiles.append(quantile) x_e[quantile] += [layout[s][0], layout[t][0], None] y_e[quantile] += [layout[s][1], layout[t][1], None] z_e[quantile] += [layout[s][2], layout[t][2], None] edge_traces = [ go.Scatter3d( x=x_e[i], y=y_e[i], z=z_e[i], mode="lines", line=dict(width=w_e[i], color=f"rgba(0,0,0,{a_e[i]})"), showlegend=False, hoverinfo="none", ) for i in range(percentile_bins) ] print( f"\tcreated scatter plot with {len(G.es)} edges across {percentile_bins} bins" ) layout = go.Layout( title=title, scene=dict( xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False), ), hovermode="closest", ) fig = go.Figure(data=edge_traces + [node_trace], layout=layout) return fig
#Feature ranking with recursive feature elimination and cross-validated selection of the best number of features #use linear regression as the model lin_reg = LinearRegression() #This is to select 8 variables: can be changed and checked in model for accuracy mod = RFECV(lin_reg, step=1, cv=20) #RFE(lin_reg, 4, step=1) mod_fit = mod.fit(X,y) #to fit #The feature ranking, such that ranking_[i] corresponds to the ranking position of the i-th feature. Selected (i.e., estimated best) features are assigned rank 1. rankings=DataFrame(mod_fit.ranking_, index=names) #Make it into data frame rankings.rename(columns ={0: 'Rank'}, inplace=True) rankings.transpose() # Selecting features to be involved intro training and predictions columns = ['others_cr','trend_macd', 'trend_ichimoku_a', 'trend_ichimoku_conv', 'trend_ichimoku_base', 'close'] df = btc[['others_cr','trend_macd', 'trend_ichimoku_a', 'trend_ichimoku_conv', 'trend_ichimoku_base', 'close']] train = df.astype(float) print('Shape of training set == {}.'.format(train.shape)); print(); print(df.tail()); print(); print('Observations: %d' % (len(df))); fig, ax = plt.subplots(figsize=(10, 6)) values = df.close
def _measure_cos_sim(columns_set: pd.DataFrame, rows_set: pd.DataFrame): similarity_matrix = rows_set.dot(columns_set.transpose()) return similarity_matrix
class InsertAndTable(QWidget): def __init__( self, Tablename: str, FeildForm: dict, database: QSqlDatabase, InsertQuery: str, parent=None, ): super().__init__(parent=parent) self.Tablename = Tablename self.database = database self.FeildForm = FeildForm self.setInsertAndLayout() self.InsertQuery = InsertQuery def setInsertAndLayout(self): layout = QVBoxLayout(self) layout1 = QHBoxLayout() layout2 = QVBoxLayout() button = QPushButton("Input Data", self) button2 = QPushButton("Delete") button3 = QPushButton("Printer!") button.clicked.connect(self.InsertShow) button2.clicked.connect(self.DeleteRow) button3.clicked.connect(self.Print) self.table = Table("projects.db", self.Tablename, self.database, self) layout1.addWidget(button) layout1.addWidget(button2) layout1.addWidget(button3) layout2.addWidget(self.table) layout.addLayout(layout1) layout.addLayout(layout2) def InsertShow(self): FormButton = FormDialog(self.Tablename, self.FeildForm, self) result = FormButton.exec_() if result == True: if self.InsertQuery != "": ExecQuery = self.InsertQuery.format( *FormButton.GetAllFeildResponses()) result = self.database.exec_(ExecQuery) self.table.refresh() def DeleteRow(self): self.table.model.removeRow(self.table.currentIndex().row()) self.table.refresh() def Print(self): Page = QPrintDialog(self) dec = Page.exec_() ###Code added for creating the excel file if dec == 1: model = self.table.model data = [] for row in range(model.rowCount()): data.append([]) for column in range(model.columnCount()): index = model.index(row, column) data[row].append(str(model.data(index))) # for formatting excels dp = dict(enumerate(string.ascii_uppercase, 1)) xlsFilepath = "./testing1.xlsx" ### creating dataframes adding columns to the dataframe self.df = DataFrame(data) self.attr = list(self.FeildForm.keys()) self.df.columns = self.attr self.no_of_attr = len(self.df.columns) # greater than or equal to 7 no_of_attr if self.no_of_attr >= 7: self.df = self.df.transpose() self.df.to_excel("testing1.xlsx") wb = openpyxl.load_workbook("testing1.xlsx") sheet = wb.active column_len = len(max(self.df.index)) sheet.column_dimensions[dp[1]].width = column_len + 5 for x, y in enumerate(self.df.columns): column_len = self.df[x].astype(str).str.len().max() print(dp[x + 2]) sheet.column_dimensions[dp[x + 2]].width = column_len + 5 wb.save(xlsFilepath) # for no_of_attributes <7 else: self.df.to_excel("testing1.xlsx") wb = openpyxl.load_workbook("testing1.xlsx") sheet = wb.active for x, y in enumerate(self.df.columns): column_len = len(max(self.df[self.df.columns[x]])) column_attr_len = len(self.df.columns[x]) print("cl=", column_len, " attr =", column_attr_len) column_len = (column_len if column_len >= column_attr_len else column_attr_len) sheet.column_dimensions[dp[x + 2]].width = column_len + 5 print(dp[x + 1], column_len + 5) sheet.column_dimensions[dp[x + 2]].width = column_len + 5 print(dp[x + 2], column_len + 5) wb.save(xlsFilepath)
def pseudobulk(adata, outpath=None, column='celltype0', label='celltype0', split_condition='donor', todrop=[ 'CELL', 'input.path', 'percent_mito', 'n_counts', 'n_genes', 'leiden', 'celltype0', 'celltype1', 'celltype2', 'celltype3', 'dblabel' ], main_condition='CONDITION'): """export pseudobulk profiles of cells to .gct files This is a function with which any type of labeling (i.e. celltype annotation, louvain clustering, etc.) can be written out to several .gct files as well as a single metadata file. To ensure FAIR compatbility label, and file name should not be changed. parameters ---------- adata: `AnnData` the AnnData object containing the labeling outpath `str` | default = current working directory filepath to the directory in which the results should be outputed, if no directory is specified it outputs the results to the current working directory. column: `str` | default = 'celltype0' Name of the column in adata.obs that is to be mapped to cell barcodes and written out to file. label: `str` | default = 'celltype0' label above the column when it is written out to several files split_condition: `str` | default = 'experiment' the experimental unit, e.g. sample ID todrop: `list` Several column headers to be excluded from metadata main_condition: `str` | default = 'CONDITION' main condition to be outputed in the metadata file returns ------- dfmerge: `pd.DataFrame` merged dataframe """ if outpath is None: outpath = os.getcwd() data = adata.obs.get(column) if data is None: sys.exit('please specify a column name that is present in adata.obs') data = adata.obs.get(column).to_frame(name=label) data = adata.obs.get(main_condition) if data is None: sys.exit( 'please specify a condition name that is present in adata.obs') ### check if the outdir exists if not create if not os.path.exists(outpath): os.makedirs(outpath) ### create adata subsets for each column value adata.obs[split_condition] = adata.obs[split_condition].astype('str') adata.obs[split_condition] = adata.obs[split_condition].astype('category') adata.obs[column] = adata.obs[column].astype('category') bulks = {} myset = list(set(adata.obs[column])) for i in myset: ii = i.replace(" ", "_") ## to avoid spaces in cell names bulks[ii] = adata[adata.obs[column].isin([i])].copy() bulks['all'] = adata.copy() ### go through each adata subset and export pseudobulk dfbulks = {} for x in bulks.keys(): # sum expression auxdata = bulks[x].copy() myexp = list(auxdata.obs[split_condition].cat.categories ) ### these are all different levels for experiments mysums = zeros((len(auxdata.raw.var.index), len(myexp))) for i in range(len(myexp)): mysums[:, i] = expm1( auxdata[auxdata.obs[split_condition] == myexp[i]].raw.X).sum( axis=0) mysums = DataFrame(mysums) mysums.index = adata.raw.var.index mysums.columns = [x + '.' + y for y in myexp] dfbulks[x] = mysums mydat = auxdata.raw.var.loc[:, ['SYMBOL', 'ENSEMBL']] mydat.rename(columns={'SYMBOL': 'Description'}, inplace=True) gct = mydat.merge(dfbulks[x], how='right', left_index=True, right_index=True) gct.set_index('ENSEMBL', inplace=True) gct.index.names = ['NAME'] gct.columns = ['Description'] + myexp #write out average expression gctFile_pseudo = outpath + 'Pseudobulk-' + label + '-' + x + '.gct' with open(gctFile_pseudo, "w") as fp: fp.write("#1.2" + "\n") fp.write(str(gct.shape[0]) + '\t' + str(gct.shape[1] - 1) + '\n') # "description" already merged in as a column fp.close() #...and then the matrix gct.to_csv(gctFile_pseudo, sep='\t', index=True, index_label='NAME', header=True, mode='a', float_format='%.3f') print('Pseudobulk-' + label + '-' + x + '.gct exported successfully to file') #### Output into single .tsv file dfmerge = concat(dfbulks, axis=1) dfmerge.columns = dfmerge.columns.droplevel() dfmerge.to_csv(outpath + 'Pseudobulk-' + label + '.tsv', sep='\t', index_label=False) ### Export one metadata file myexp = list(adata.obs[split_condition].cat.categories) colindex = range(0, len(adata.obs.columns) ) ### replace if only a subset of metadata should be used mysums = [] for i in range(len(myexp)): mysums.append( list(adata[adata.obs[split_condition] == myexp[i]].obs.iloc[:, colindex].iloc[0, :])) mysums = DataFrame(mysums).transpose() mysums.index = adata[adata.obs[split_condition] == myexp[i]].obs.iloc[:, colindex].columns mysums.columns = myexp mysums = mysums.transpose().drop(labels=todrop, axis=1, errors='ignore') mysums['ID'] = list(mysums.index) colorder = ['ID', main_condition] + (mysums.columns.drop( ['ID', main_condition]).tolist()) mysums.loc[:, colorder].to_csv(outpath + 'Pseudobulk.meta', sep='\t', index=False) return (dfmerge) sys.exit(0)
def cpu_perf(systems, unique_id, group_number, detail_options, rampup_value=0, current_dir=""): have_cpu_data = False host_cpu_list = search_item(systems, unique_id, "cpu", "(.*)", [], ['product']) host_cpu_number = search_item(systems, unique_id, "cpu", "(.*logical.*)", [], ['number']) core_counts = 1 for host in host_cpu_number: for item in host_cpu_number[host]: core_counts = item[3] break cpu_type = '' for host in host_cpu_list: for item in host_cpu_list[host]: cpu_type = item[3] break modes = ['bogomips', 'loops_per_sec'] sets = search_item(systems, unique_id, "cpu", "(.*)", [], modes) global_perf = dict() for mode in sorted(modes): results = {} for system in sets: cpu = [] series = [] found_data = False for perf in sets[system]: if perf[2] == mode: # We shall split individual cpu benchmarking from # the global one if "_" in perf[1]: if not perf[1] in cpu: cpu.append(perf[1]) series.append(float(perf[3])) found_data = True elif "loops_per_sec" in mode: global_perf[system] = float(perf[3]) found_data = True if found_data is True: # If no series are populated, it means that a single # "All CPU" run was done # If so, let's create a single run value if not series: series.append(global_perf[system]) cpu.append("logical") results[system] = Series(series, index=cpu) # No need to continue if no CPU data in this benchmark if not results: continue df = DataFrame(results) consistent = [] curious = [] unstable = [] details = [] matched_category = [] for cpu in df.transpose().columns: if have_cpu_data is False: print() print("Group %d : Checking CPU perf" % group_number) have_cpu_data = True print_perf(2, 7, df.transpose()[cpu], df, mode, cpu, consistent, curious, unstable, "", rampup_value, current_dir) prepare_detail(detail_options, group_number, mode, cpu, details, matched_category) print_detail(detail_options, details, df, matched_category) print_summary(mode, consistent, "consistent", "", df, cpu_type) print_summary(mode, curious, "curious", "", df) print_summary(mode, unstable, "unstable", "", df) if mode == "loops_per_sec": efficiency = {} mode_text = 'CPU Effi.' consistent = [] curious = [] unstable = [] details = [] matched_category = [] for system in sets: host_efficiency_full_load = [] host_perf = (df[system].sum() * (int(core_counts) / df[system].count())) host_efficiency_full_load.append( global_perf[system] / host_perf * 100) efficiency[system] = Series(host_efficiency_full_load, index=[mode_text]) cpu_eff = DataFrame(efficiency) print_perf(1, 2, cpu_eff.transpose()[mode_text], cpu_eff, mode, mode_text, consistent, curious, unstable) prepare_detail(detail_options, group_number, mode, mode_text, details, matched_category) print_detail(detail_options, details, cpu_eff, matched_category) print_summary("CPU Efficiency", consistent, "consistent", '%', cpu_eff) print_summary("CPU Efficiency", curious, "curious", '%', cpu_eff) print_summary("CPU Efficiency", unstable, "unstable", '%', cpu_eff)
"2016-09-02T00:00:00.000000000":"2016-09-08T00:00:00.000000000"].idxmax() JuliaPeak_t18 = df_t18.loc[ "2016-09-18T00:00:00.000000000":"2016-09-25T00:00:00.000000000"].max() JuliaPeak_t18_time = df_t18.loc[ "2016-09-18T00:00:00.000000000":"2016-09-25T00:00:00.000000000"].idxmax() MatthewPeak_t18 = df_t18.loc[ "2016-10-07T00:00:00.000000000":"2016-10-14T00:00:00.000000000"].max() MatthewPeak_t18_time = df_t18.loc[ "2016-10-07T00:00:00.000000000":"2016-10-14T00:00:00.000000000"].idxmax() peaks_values = DataFrame([ HerminePeak_t1, JuliaPeak_t1, MatthewPeak_t1, HerminePeak_t9, JuliaPeak_t9, MatthewPeak_t9, HerminePeak_t18, JuliaPeak_t18, MatthewPeak_t18 ]) peaks_values = peaks_values.transpose() peaks_values.columns = [ 'HerminePeak_t1', 'JuliaPeak_t1', 'MatthewPeak_t1', 'HerminePeak_t9', 'JuliaPeak_t9', 'MatthewPeak_t9', 'HerminePeak_t18', 'JuliaPeak_t18', 'MatthewPeak_t18' ] peak_times = DataFrame([ HerminePeak_t1_time, JuliaPeak_t1_time, MatthewPeak_t1_time, HerminePeak_t9_time, JuliaPeak_t9_time, MatthewPeak_t9_time, HerminePeak_t18_time, JuliaPeak_t18_time, MatthewPeak_t18_time ]) peak_times = peak_times.transpose() peak_times.columns = [ 'HermineTime_t1', 'JuliaTime_t1', 'MatthewTime_t1', 'HermineTime_t9',
def pfa_coef_counts(coef: pd.DataFrame): coef = coef.drop(columns=['factor']) coef = coef.transpose() coef = coef.reindex(["correct_coef", "incorrect_coef", "intercept"]) coef.insert(0, column="cor", value=[1., 0., 1.]) return coef.to_numpy()