def bankrisk(innercode, checkdate): innercodeCalc, result = ca.reshape_innercode(innercode=innercode, checkdate=checkdate, cache=ca.bankRiskCache) #innercodeCalc=innercode result = df(result,columns=('innercode', 'es')) if len(innercodeCalc) != 0: query="""SELECT financedata.bank_fin_prd.INNER_CODE, financeprd.bank_risk.VAR FROM financedata.bank_fin_prd, financeprd.bank_risk WHERE financedata.bank_fin_prd.INNER_CODE IN ( %s ) AND financeprd.bank_risk.BANK_ID = financedata.bank_fin_prd.BANK_ID """ % ','.join(innercodeCalc) result_new = GetDataFromDB(config, query) result_new.columns = ['innercode', 'es'] innercode_empty=df(columns=['innercode', 'es']) innercode_empty['innercode']=list(set(innercodeCalc)-set(result_new['innercode'].astype(str))) innercode_empty['es']='' innercode_empty.columns=['innercode','es'] result_new=result_new.append(innercode_empty,ignore_index=1) #save cache for new data ca.set_cache(result_new=result_new, checkdate=checkdate,cache=ca.bankRiskCache) result=result.append(result_new,ignore_index=1) result['checkdate'] = checkdate else: result['checkdate'] = checkdate return (result.to_json(orient='index'))
def __init__(self, niimg, xy=None, alpha=.5, ch_user=None, fit=True, cmap='magma'): from nilearn import _utils if isinstance(niimg, str): niimg = _utils(niimg) # Store params self.fit = fit self.alpha = alpha self.xy = xy # Read MRI data data = niimg.get_data() affine = niimg.affine # Setup MRI viewer self.axes = list() # ---- Horizontally stack the three views widths = np.array(np.abs([affine[1, -1], affine[0, -1], affine[0, -1]]), int) cum_width = 0 for ii, width in enumerate(widths): ax = plt.subplot2grid((1, sum(widths)), (0, cum_width), colspan=width) self.axes.append(ax) cum_width += width plt.subplots_adjust(hspace=0, wspace=0, left=0, right=1.0, top=1., bottom=0.1) self.viewer = OrthoSlicer3D(data, affine=affine, cmap=cmap, axes=self.axes) # ---- Each view has a particular x, y, depth order self.ax_xyd = ([1, 2, 0], [0, 2, 1], [0, 1, 2]) # Interactive components ax.get_figure().canvas.mpl_connect('key_press_event', self._press) # channel locations columns = (['handle_%i' % ii for ii in range(3)] + ['x', 'y', 'z', 'x_idx', 'y_idx', 'z_idx']) self.ch_user = df(columns=columns) if ch_user is None else ch_user self.ch_pred = df(columns=columns) # Scatter plot handles plt.gcf().canvas.mpl_connect('motion_notify_event', self._draw) self._last_refresh = time() # XXX to refresh every 100 ms max # Grid GUI self.grid = GridGUI(ax=plt.axes([0.1, 0., 0.1, 0.1]), xy=xy) # Setup surface fitting self.model = ModelSurface(alpha=alpha, verbose=None) self._init_add() plt.show()
def process_lock(args,junction): global table name = args.replace("/","_") dir = "pickle/" + name + "/" path = "C:/Users/IBM_ADMIN/Documents/Disseration/Figures/" data = df().from_csv(dir + "data.csv") result = db.peak_weekday.find({"_id":args}).sort("_id", -1) hour = [0,0] global weatherstore loc = df() data["IDUBLINC2_dailyrainMM"] = weatherstore["IDUBLINC2"]["dailyrainMM"] data["IDUBLINC2_TemperatureC"] = weatherstore["IDUBLINC2"]["TemperatureC"] point = wd.stations_coordinates("IDUBLINC2") loc["IDUBLINC2_distance"] = jh.distance_between_junction(junction, point) data["ILEINSTE8_dailyrainMM"] = weatherstore["ILEINSTE8"]["dailyrainMM"] data["ILEINSTE8_TemperatureC"] = weatherstore["ILEINSTE8"]["TemperatureC"] point = wd.stations_coordinates("ILEINSTE8") loc["ILEINSTE8_distance"] = jh.distance_between_junction(junction, point) data["ICODUBLI2_dailyrainMM"] = weatherstore["ICODUBLI2"]["dailyrainMM"] data["ICODUBLI2_TemperatureC"] = weatherstore["ICODUBLI2"]["TemperatureC"] point = wd.stations_coordinates("ICODUBLI2") loc["ICODUBLI2_distance"] = jh.distance_between_junction(junction, point) data = data.dropna() for res in result: h = res["item"][0]["hour"] hour[0] = str(h) + ":00" hour[1] = str((h)) + ":59" try: data = data.between_time(hour[0],hour[1]).resample('B').dropna() except Exception: data = data.resample('B').dropna() ts1 = pd.Series(data["STT"].values.squeeze()) ts_rain = {} #ts_temp = {} ts_rain["IDUBLINC2_dailyrainMM"] = pd.Series(data["IDUBLINC2_dailyrainMM"].values.squeeze()) ts_rain["IDUBLINC2_TemperatureC"] = pd.Series(data["IDUBLINC2_TemperatureC"].values.squeeze()) ts_rain["ILEINSTE8_dailyrainMM"] = pd.Series(data["ILEINSTE8_dailyrainMM"].values.squeeze()) ts_rain["ILEINSTE8_TemperatureC"] = pd.Series(data["ILEINSTE8_TemperatureC"].values.squeeze()) ts_rain["ICODUBLI2_dailyrainMM"] = pd.Series(data["ICODUBLI2_dailyrainMM"].values.squeeze()) ts_rain["ICODUBLI2_TemperatureC"] = pd.Series(data["ICODUBLI2_TemperatureC"].values.squeeze()) global columns #result = autocorrelation_plot(ts1) row = [np.corrcoef(ts1.values,ts_rain[a].values)[0,1] for a in ts_rain.keys()] table.append(row); columns=ts_rain.keys() records.append(args.replace("/","_"))
def extract_linear_regression_data(filepath): # read csv into pandas DF # insert a column of ones into x # take furthest right column asd y # initialize theta to 0's data = pd.read_csv(filepath, header=None) x= df(data[list(data.columns)[:-1]]).astype('float') x.insert(0,None,1) x.columns = range(len(x.columns)) y= df(data[data.shape[1]-1]).astype('float') y.columns = [0] theta = df(0,index=range(len(x.columns)),columns=range(1)).astype('float') return x,y,theta
def unstack_me(fid, cols, dcols, outfile): """Unstacks data and writes to file""" data = pd.read_excel(fid) unstacked = df() leaders = df(data, columns=cols) for dc in dcols: temp = df(leaders) temp['Analyte'] = dc temp['Result'] = data[dc] # temp['Result'] = temp.Result.apply(fixred) unstacked = unstacked.append(temp, ignore_index=True) excelwriter(outfile, unstacked)
def unstack_frame(frame, cols, dcols): """Unstacks dataframe and returns unstacked df cols: columns that will be repeated with sample info dcols: data columns for unstacking that will be individually appended to cols """ data = frame unstacked = df() leaders = df(data, columns=cols) for dc in dcols: temp = df(leaders) temp['Analyte'] = dc temp['Result'] = data[dc] # temp['Result'] = temp.Result.apply(fixred) unstacked = unstacked.append(temp, ignore_index=True) return unstacked
def calc_probabilities(self, data): """ The method returns the selection probability associated with the the different choices. Inputs: data - DataArray object """ [shape_param] = [1, ] * genlogistic.numargs observed_utility = self.calc_observed_utilities(data) num_choices = self.specification.number_choices probabilities = df(columns=self.choices, index=data.index) lower_bin = 0 for i in range(num_choices - 1): value = self.thresholds[i] - observed_utility if self.distribution == 'logit': upper_bin = genlogistic.cdf(value, shape_param) else: upper_bin = norm.cdf(value) choice = self.choices[i] probabilities.loc[:, choice] = upper_bin - lower_bin lower_bin = upper_bin choice = self.choices[i+1] #Last ordered choice probabilities.loc[:, choice] = 1 - upper_bin return probabilities
def run(): cursor = db.junctions.find({"_id":"30/7/1"}) json_str =json_util.dumps(cursor) junctions =json_util.loads(json_str) junctions = sorted(junctions, key=lambda k: k['route']) d1 = None for junction in junctions: d1 = process(junction["_id"]) cursor = db.junctions.find({"_id":"13/2/1"}) json_str =json_util.dumps(cursor) junctions =json_util.loads(json_str) junctions = sorted(junctions, key=lambda k: k['route']) d2 = None for junction in junctions: d2 = process(junction["_id"]) cursor = db.junctions.find({"_id":"17/6/1"}) json_str =json_util.dumps(cursor) junctions =json_util.loads(json_str) junctions = sorted(junctions, key=lambda k: k['route']) d3 = None for junction in junctions: d3 = process(junction["_id"]) #d3.plot() d = df({ "Low - 30/7/1": [d1["STT"].quantile(i/100) for i in range(1,99,10)], "Medium - 13/2/1":[d2["STT"].quantile(i/100) for i in range(1,99,10)], "High - 17/6/1":[d3["STT"].quantile(i/100) for i in range(1,99,10)] }) d.plot(ylim=[0,600]) plt.show()
def main(): #create the training & test sets, skipping the header row with [1:] dataset = genfromtxt('D:/train.csv', delimiter=',', dtype=np.str_)[1:] print ('loaded train') target = [x[1] for x in dataset] train = [x[2:5].tolist() + x[14:].tolist() for x in dataset] #Clear the variable to release the memory. dataset = None dataset = genfromtxt('D:/test.csv', delimiter=',', dtype=np.str_)[1:] test_id = [x[0] for x in dataset] test = [x[1:4].tolist() + x[13:].tolist() for x in dataset] print ('loaded test') #create and train the random forest rf = RandomForestClassifier(n_estimators=100, n_jobs=16) rf.fit(train, target) print ("done fitting") columns_obj = ["id", "click"] list_obj = list(zip(test_id,rf.predict(test).tolist())) df_obj = df(list_obj, columns=columns_obj) savetxt('D:/submission.csv', df_obj, delimiter=',', fmt='%s') print ("prediction complete")
def process_lock(args): cursor = db.junctions.find({"_id":args}) rain = getseriesweather('dailyrainMM',"mean","IDUBLINC2") wind = getseriesweather('WindSpeedGustKMH',"mean","IDUBLINC2") temperature = getseriesweather('TemperatureC',"mean","IDUBLINC2") #json_str =json_util.dumps(cursor) #junctions =json_util.loads(json_str) #neighbours1 = list(db.junctions.find({"junction2.point":junctions[0]["junction1"]["point"]})) #neighbours2 = list(db.junctions.find({"junction1.point":junctions[0]["junction2"]["point"]})) series = [] neighbours = [] #neighbours.extend(neighbours1) #neighbours.extend(neighbours2) #print("1",neighbours1) #print("2",neighbours2) arg = args.split("/") series1 = {"route":arg[0],"link":arg[1],"direction":arg[2]} selected_series = getseries(series1) #for n in neighbours: #if not n["direction"] == series1["direction"]: #if not (n["route"] + "/" + n["link"]) == (series1["route"] + "/" + series1["link"]): #series.append(getseries({"route": n["route"],"link": n["link"],"direction": n["direction"]})) shift = 10*6*24 ds = {"STT":selected_series} dframe = df(ds)
def getseriesweather(location): global data print(data) if data is None: data = db.weather.find({"location":location}).sort("_id", -1) #json_str =json_util.dumps(data) #data =json_util.loads(json_str) dates = [] dailyrainMM = [] windSpeedGustKMH = [] Humidity = [] HourlyPrecipMM = [] TemperatureC = [] for res in (data): for res2 in (res['item']): dailyrainMM.append(float(res2["dailyrainMM"])) windSpeedGustKMH.append(float(res2["WindSpeedGustKMH"])) Humidity.append(float(res2["Humidity"])) HourlyPrecipMM.append(float(res2["HourlyPrecipMM"])) TemperatureC.append(float(res2["TemperatureC"])) dates.append(datetime.strptime(res2['Time'],'%Y-%m-%d %H:%M:%S')) dframe = df({"dailyrainMM":TimeSeries(dailyrainMM,dates), "WindSpeedGustKMH":TimeSeries(windSpeedGustKMH,dates), "Humidity":TimeSeries(Humidity,dates), "HourlyPrecipMM":TimeSeries(HourlyPrecipMM,dates), "TemperatureC":TimeSeries(TemperatureC,dates)}) dframe.to_csv("c:/result.csv") return dframe
def create_submission(events,yw,yb): print "Preparing submission file...." submission = df() submission["Event"] = events submission["WhiteElo"] = yw submission["BlackElo"] = yb submission.to_csv("submission.csv",index=False)
def get_grid(self, astype='table'): from pandas import DataFrame as df geoms = self.geometries().keys() phases = [p.name for p in self.phases().values() if not hasattr(p, 'mixture')] grid = df(index=geoms, columns=phases) for r in grid.index: for c in grid.columns: phys = self.find_physics(phase=self[c], geometry=self[r]) if phys is not None: grid.loc[r][c] = phys.name else: grid.loc[r][c] = '---' if astype == 'pandas': pass elif astype == 'dict': grid = grid.to_dict() elif astype == 'table': from terminaltables import SingleTable headings = [self.network.name] + list(grid.keys()) g = [headings] for row in list(grid.index): g.append([row] + list(grid.loc[row])) grid = SingleTable(g) grid.title = 'Project: ' + self.name grid.padding_left = 3 grid.padding_right = 3 grid.justify_columns = {col: 'center' for col in range(len(headings))} elif astype == 'grid': grid = ProjectGrid() return grid
def recursive_add_consumers(consumer_id, seen = set([])): if consumer_id is None: return seen.add(consumer_id) consumer_key = sample[sample.Consumer == consumer_id] IP = df.drop_duplicates(df(consumer_key.IP)) n = np.array(np.arange(len(IP))) IP_Map = set([]) for i in n: value = sample[sample.IP.isin([IP.iloc[i,0]])] IP_Map.add(value) #print IP_Map print consumer_id print seen consumer_list = [] #list of unique consumers that are linked to this one [consumer_list.extend(y.Consumer.iloc[l].tolist()) for l in [range(len(y.Consumer)) for y in IP_Map]] #print consumer_list #print [x for x in set(consumer_list).difference([consumer_id])] #unique_consumer_list = [] #print [ x for x in set([y.Consumer.iloc[0] for y in IP_Map])] #tuples of ips and unique consumers attached to them print [(y.IP.iloc[0],set(y.Consumer.iloc[l].tolist())) for l in [range(len(y.Consumer)) for y in IP_Map]]
def data_result(args): name = args.replace("/","_") dir = "pickle/" + name + "/" path = "C:/Users/IBM_ADMIN/Documents/Disseration/Figures/" data = df().from_csv(dir + "data.csv") data = data.resample('B').dropna() return data
def to_dataframe(data_array): dataframe = df(data_array) dataframe.columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume'] dataframe['datetime'] = dataframe.timestamp.apply( lambda x: pandas.to_datetime(datetime.fromtimestamp(x / 1000).strftime('%c'))) dataframe.set_index('datetime', inplace=True, drop=True) dataframe.drop('timestamp', axis=1, inplace=True) return dataframe
def prepare_for_pawn(source_df, cols_to_include): to_handle = source_df.copy(deep=True) to_handle = to_handle[to_handle["ActionType_Pawn"].notnull()] cols_to_merge = ["Matter_Tablet", "Matter_Phone"] join_cols(to_handle, cols_to_merge) cols_to_merge = ["Matter_Other", "Matter_TV"] join_cols(to_handle, cols_to_merge) return df(to_handle, columns=cols_to_include)
def IP_Weight_Calc(consumer_id): if consumer_id == None: return consumer_key = sample[sample.Consumer == consumer_id] IP = df.drop_duplicates(df(consumer_key.IP)) n = np.array(np.arange(len(IP))) IP_Weight_List = [] for i in n: value = sample[sample.IP.isin([IP.iloc[i,0]])] value2 = len(df.drop_duplicates(df(value.Consumer))) value3 = (1/(value2**2)) IP_Weight_List.append(value3) return sum(IP_Weight_List)
def get_beta(self): # doing johanson test and return data = df() for i in range(len(self.__instrumentList)) : data[self.__instrumentList[i]] = np.asarray(self.__ds[self.__instrumentList[i]][-1*self.__windowSize:]) jres = johansen.coint_johansen(data[self.__instrumentList], 0, 1) result = dict() for i in range(len(self.__instrumentList)) : result[self.__instrumentList[i]] = jres.evec[i,0] return result
def process_lock(args): cursor = db.junctions.find({"_id":args}) rain = getseriesweather('dailyrainMM',"mean","IDUBLINC2") wind = getseriesweather('WindSpeedGustKMH',"mean","IDUBLINC2") temperature = getseriesweather('TemperatureC',"mean","IDUBLINC2") #json_str =json_util.dumps(cursor) #junctions =json_util.loads(json_str) #neighbours1 = list(db.junctions.find({"junction2.point":junctions[0]["junction1"]["point"]})) #neighbours2 = list(db.junctions.find({"junction1.point":junctions[0]["junction2"]["point"]})) series = [] neighbours = [] #neighbours.extend(neighbours1) #neighbours.extend(neighbours2) #print("1",neighbours1) #print("2",neighbours2) arg = args.split("/") series1 = {"route":arg[0],"link":arg[1],"direction":arg[2]} selected_series = getseries(series1) #for n in neighbours: #if not n["direction"] == series1["direction"]: #if not (n["route"] + "/" + n["link"]) == (series1["route"] + "/" + series1["link"]): #series.append(getseries({"route": n["route"],"link": n["link"],"direction": n["direction"]})) shift = 10*6*24 ds = {"STT":selected_series, "Wind":wind, "Rain":rain, "Temperature":temperature, "STT1":ewma(selected_series.shift(shift), span=1+shift), "STT2":ewma(selected_series.shift(shift), span=2+shift), "STT3":ewma(selected_series.shift(shift), span=3+shift), "Wind1":ewma(wind.shift(shift), span=3+shift), "Temperature1":ewma(temperature.shift(shift), span=3+shift), "Rain1":ewma(rain.shift(shift), span=3+shift) } dframe = df(ds) t_list = list(['STT1','STT2','STT3','Rain','Wind','Temperature','Rain1','Wind1','Temperature1']) for i,s in enumerate(dframe.columns.values): dframe[s].fillna(method="pad", inplace=True) train_df = dframe["2013-01-01":"2014-04-17"].resample('H', how="max",convention='end',fill_method='pad').copy() test_df = train_df.copy()["2013-09-01":"2014-02-15"] train_df = train_df.copy()["2013-02-16":"2014-04-17"] train_df.to_csv("pickle/" + args.replace("/","_") + "/" + "training_data.csv") test_df.to_csv("pickle/" + args.replace("/","_") + "/" + "testing_data.csv")
def append_data(self, df_obj): """Appends dataframe obj to ScrapeData data Should be a setter, need to refactor""" try: df_obj = self.__setup_data(df_obj) cols = self.data.columns for_import = df(df_obj, columns=cols) return self.data.append(for_import, ignore_index = True) except KeyError: print 'Something is non-standard, prep data manually'
def resolve_consistency(self, data, seed, numberProcesses): pschedulesGrouped = data.data.groupby(level=[0,1], sort=False) verts = df(columns=self.colNames) verts[self.hidName] = pschedulesGrouped[self.hidName].min() verts[self.pidName] = pschedulesGrouped[self.pidName].min() verts[self.starttimeName] = pschedulesGrouped[self.starttimeName].min() verts[self.endtimeName] = pschedulesGrouped[self.endtimeName].max() return DataArray(verts.values, self.colNames, indexCols=[self.hidName, self.pidName]) """
def run(): cursor = db.junctions.find() json_str =json_util.dumps(cursor) junctions =json_util.loads(json_str) junctions = sorted(junctions, key=lambda k: k['route']) route = [int(j["route"]) for j in junctions] link = [int(j["link"]) for j in junctions] direction = [int(j["direction"]) for j in junctions] d = df({"route":route, "link":link, "direction":direction}) print(d.describe())
def process(args): #['ICODUBLI2','ILEINSTE8','IDUBLINC2'] dframe1 = getseriesweather("ICODUBLI2")["2014-01-24":"2014-01-29"].resample("H", how="mean",convention='end',fill_method="pad") dframe2 = getseriesweather("ILEINSTE8")["2014-01-24":"2014-01-29"].resample("H", how="mean",convention='end',fill_method="pad") dframe3 = getseriesweather("IDUBLINC2")["2014-01-24":"2014-01-29"].resample("H", how="mean",convention='end',fill_method="pad") dframe = df({"ICODUBLI2":dframe1,"ILEINSTE8":dframe2,"IDUBLINC2":dframe3}) dframe.plot() print(dframe.corr()) plt.show()
def __init__(self, data=None, varnames=None, index=None, indexCols=None): # TODO:index if varnames is not None: for varname in varnames: self.check_varname(varname) else: varnames = [] try: self.data = df(data, columns=varnames, index=index) except Exception, e: raise DataError, ("""Error creating the data frame object with the dataset:%s""" % e)
def run(self, flatten=True): if not self.quiet: print("running with collection: %s, query: %s, project: %s" % (str(self.col), self.query, self.project)) c = self.col.find(self.query, self.project) self.res = [] for item in c: if flatten: self.res.append(flatten_dict(item)) else: self.res.append(item) self.res = df(self.res) self.__cleanup()
def get_neighbour(self,id): if self.matrix is "": cursor = self.db.junctions.find() json_str =json_util.dumps(cursor) junctions =json_util.loads(json_str) junctions = sorted(junctions, key=lambda k: k['route']) matrix = [] headers = [y['_id'] for y in junctions] for x in junctions: matrix.append([int(jh.is_neighbour(x, y)) for y in junctions]) matrix = df(matrix,columns=headers,index=headers) return matrix[id]
def run(): cursor = db.junctions.find() json_str =json_util.dumps(cursor) junctions =json_util.loads(json_str) #os.remove(r'weather_correlation_lagged.csv') junctions = sorted(junctions, key=lambda k: k['route']) for junction in junctions[:4]: process(junction["_id"],junction) data = df(table,columns=columns) data["_index"] = records data.dropna().to_csv("spatial_correlation_lagged.csv")
def process_lock(args): print(args) dir = args.split("/") pickle_dir = "pickle/" + args.replace("/","_") if not os.path.exists(pickle_dir): os.makedirs(pickle_dir) series1 = {"route":dir[0],"link":dir[1],"direction":dir[2]} selected_series = getseries(series1) ds = {"STT":selected_series} dframe = df(ds) dframe.to_csv(pickle_dir + "/" + "data.csv")
def prepare_df(X, Y, output_mode="binary", pred_hours=[8], min_ob_window = 4): X_out, y_out, hours_left = cut_data(X, output_mode = output_mode, pred_hours = pred_hours, min_ob_window= min_ob_window) print(len(X_out), len(hours_left), len(y_out)) out_df = df({'X': X_out, 'hours_pass': [ h.shape[0] for h in X_out ], # 'ylos': Y, 'hours_left': hours_left, 'y': y_out}) # .sort_values(by ='hours') return out_df
tail = True head = True # 가격 리스트 채우기 for link in bsObject.find_all("span", {"class": "text-muted small"}): value = int(link.text.strip().replace('(', "").replace(',', "").replace( '원)', "")) # 총 가격 구하기 if i == 1: value = value * 2 elif i == 2: value = value * 3 if value % 10 == 1: value = value - 1 elif value % 10 == 9: value = value + 1 elif i == 3: value = value * 4 elif i == 4: value = value * 5 price_list.append(value) # 데이터프레임 생성 db = df(data={'Product': product_list, 'Sale': sale_list, 'Price': price_list}) # csv 변환 db.to_csv(csv_name + ".csv", mode="w", header=False, index=False, encoding='utf-7')
''' data = { 'id': ['a1', 'a2', 'a3', 'a4', 'a5'], 'x1': [1, 2, 3, 4, 5], 'x2': [3.0, 4.5, 3.2, 4.0, 3.5] } #df=DataFrame(data) #print(df) #df=DataFrame(data) #df.index=df['id'] #df.pop('id') #print(df) df_1 = df(data=np.arange(12).reshape(3, 4), index=['r0', 'r1', 'r2'], dtype='int', columns=['c0', 'c1', 'c2', 'c3']) df_2 = df( { 'class_1': ['a', 'a', 'b', 'b', 'c'], 'var_1': np.arange(5), 'var_2': np.random.randn(5) }, index=['r0', 'r1', 'r2', 'r3', 'r4']) #print(df_2) #print(df_2.columns) #print(df_2[['class_1', 'var_2']]) idx = ['r0', 'r1', 'r2', 'r3', 'r4'] df_1 = df({
# Format = [ # ['H','F','F'etc.], (listes_genre) # [0, 1, 2, etc.] (femmes_counter_array) # [0.0, 0.5, 0.67, etc.] (proportion_counter) # ] for parti_liste in listes_genre: femmes_counter_array = [] proportion_counter = [] femmes_counter = 0 for i in range(len(parti_liste)): if parti_liste[i] == 'F': femmes_counter += 1 femmes_counter_array.append(femmes_counter) proportion_counter.append(femmes_counter / (i+1)) listes_genre_et_counter.append((parti_liste, femmes_counter_array,proportion_counter)) partis_dfs = [] for i in range(len(listes_genre_et_counter)): parti_tableau = {} parti_grosses_listes = listes_genre_et_counter[i] for j in range(len(parti_grosses_listes)): categorie_liste = parti_grosses_listes[j] nom_colonne = (' '.join([categories[j],headers[i]])) parti_tableau[nom_colonne] = categorie_liste partis_dfs.append(df(parti_tableau)) import pandas as pd t = pd.concat(partis_dfs, axis=1) t['Classement des circonscriptions'] = range(1, len(t) + 1) t.set_index('Classement des circonscriptions').to_csv('../gt.csv')
def make_beam(self, xi_distr, r_distr, pz_distr, ang_distr, Ipeak_kA, q_m=1.0, partic_in_layer=200, saveto='./', name='beamfile.bin'): """make_beam(xi_shape, r_shape, pz_shape, ang_shape, Ipeak_kA, N_partic=10000, q_m=1.0, partic_in_layer = 200, saveto='./')""" if q_m == 1 and Ipeak_kA > 0: print('Electrons must have negative current.') return if xi_distr.med > 0: print('Beam center is in xi>0.') try: partic_in_layer = self.beam_partic_in_layer except: print('Variable partic_in_layer is not found. Default value: 200.') try: xi_step = self.xi_step r_size = self.r_size except: xi_step = 0.01 r_size = 10 print( 'Variable xi_step or r_size is not found. Default values: xi_step = %.3f, r_size = %3f' % (xi_step, r_size)) if saveto and 'beamfile.bin' in os.listdir(saveto): print( 'Another beamfile.bin is found. You may delete it using the following command: "!rm %s".' % os.path.join(saveto, name)) return I0 = 17 # kA q = 2. * Ipeak_kA / I0 / partic_in_layer stub_particle = np.array([[-100000., 0., 0., 0., 0., 1.0, 0., 0.]]) gamma = pz_distr.med N = 10000 while True: xi = xi_distr(N) print('Trying', N, 'particles') xi = xi[(-self.xi_size <= xi)] # & (xi <= 0)] if np.sum((xi_distr.med - xi_step / 2 < xi) & (xi < xi_distr.med + xi_step / 2)) < partic_in_layer: print( N, 'is not enough:', np.sum((xi_distr.med - xi_step < xi) & (xi < xi_distr.med))) N *= 10 continue until_middle_layer_filled = [ np.cumsum((xi_distr.med - xi_step < xi) & (xi < xi_distr.med)) <= partic_in_layer ] xi = xi[until_middle_layer_filled] K = xi.shape[0] print(K, 'is enough') xi = np.sort(xi)[::-1] r = np.abs(r_distr(K)) pz = pz_distr(K) pr = gamma * ang_distr(K) M = gamma * ang_distr(K) * r particles = np.array([ xi, r, pz, pr, M, q_m * np.ones(K), q * np.ones(K), np.arange(K) ]) beam = np.vstack([particles.T, stub_particle]) break beam = df(beam, columns=['xi', 'r', 'pz', 'pr', 'M', 'q_m', 'q', 'N']) head = beam[beam.eval('xi>0')] beam = beam[beam.eval('xi<=0')] #beam.sort_values('xi', inplace=True, ascending=False) if saveto: beam.values.tofile(os.path.join(saveto, name)) head.values.tofile(os.path.join(saveto, 'head-' + name)) return beam
# The tfidfvector trained on the person's corpus contains terms by index p_vector = persons_vocab[person]['tfidfvec'] # The sparse term matrix resulting from vector training contains term weights by index # Sparse term matrices are defined by an array where row and column coordinates of only those cells with values are known. # array[k] = [row[k], col[k]] = data[k] # In the case of a sparse matrix generated by a tfidfvector from a corpus: # Each row is a document # Each column is a term # Each cell is a term weight. p_term_matrix = persons_vocab[person]['term_matrix'] # Now to just extract the terms and weights and add them to the persons_weighted_terms dictionary for doc in persons_vocab['term_matrix']: # Finally, add to the list of weighted terms. ## vocabs_df = pd.DataFrame([{},]) ## vocabs_df = vocabs_df.set_index('') #cols = word_counts.keys() vocabs_df = pd.df() # Standardize the data # Perform dimensionality reduction by PCA # Export the resulting dataframe
def create_df(): return df(columns=('ID', 'element_type', 'time', 'x_f', 'y_f', 'z_f', 'x_s', 'y_s', 'z_s', 'distance', 'ceiling_lvl', 'profile', 'u_x', 'u_y', 'u_z', 'HRRPUA', 'alpha'))
} for i in table_columns_post], data=table_data, row_selectable='multi', fixed_rows={'headers': True}, selected_rows=[0], sort_action='native', style_cell={ 'textAlign': 'left', 'width': '40px' }, style_table={ 'height': '300px', 'overflowY': 'auto' }, style_data_conditional=discrete_background_color_bins( df(data=table_data), columns=table_columns_quintiles) + discrete_background_color_bins(df(data=table_data), columns=table_columns_places, dark_color='Greens') + [ { 'if': { 'filter_query': '{Index} < 0', # matching rows of a hidden column with the id, `id` 'column_id': 'Entry' }, 'backgroundColor': 'rgb(255,248,220)' }, { 'if': { 'column_id': 'Entry'
from bs4 import BeautifulSoup from urllib.request import urlopen from pandas import DataFrame as df response = urlopen( 'http://cu.bgfretail.com/product/productAjax.do?pageIndex=1&searchMainCategory=10&searchSubCategory=3&listType=0&searchCondition=setA&searchUseYn=N&gdIdx=0&codeParent=10&user_id=&search2=&searchKeyword=' ) soup = BeautifulSoup(response, 'html.parser') a = [] b = [] c = [] for price in soup.select('p.prodPrice'): a.append(price.get_text()) #print(price.get_text()) for name in soup.select('p.prodName'): b.append(name.get_text()) #print(name.get_text()) for img_url in soup.select('img[src*=".jpg"]'): c.append(img_url.get('src', '/')) #print(img_url.get('src', '/')) df1 = df({"prodName": list(b), "prodPirce": list(a), "img_url": list(c)}) df1.to_csv('CU_ham.csv', index=False)
listSuptNameVal.append(pcfSuptName) listSuptUciVal.append(pcfSuptUci) listSuptMtlVal.append(pcfSuptMtlList) pcfSuptName = "" pcfSuptUci = "" pcfSuptMtlList = "" iB = 0 else: iB = 0 if iA == 0: print('레퍼런스 ID가 없는 {} 파일이 존재합니다.'.format(tagerFile)) break except Exception: print('{} 파일에서 에러가 났어요.'.format(tagerFile)) data = { 'SUPPORT NAME': listSuptNameVal, #input excel coulumn name 'SUPPORT UCI': listSuptUciVal, #input excel coulumn name 'SUPPORT MATERIAL LIST': listSuptMtlVal, #input excel coulumn name 'WBS ISO': listRefVal, #input excel coulumn name 'ATTRIBUTE30': listAtt30Val, #input excel coulumn name 'ATTRIBUTE34': listAtt34Val } #input excel coulumn name pcfDF = df(data) writer = ExcelWriter('PCF_output.xlsx') pcfDF.to_excel(writer, 'PCF', index=False) writer.save() print("완료")
# # # 根据训练样本中异常样本比例,得到阈值,用于绘图 # data_1 = pd.concat([data, scores_pred_df], axis=1, join_axes=[data.index]) # print(data_1.head(3)) # print('') # # data_2 = data_1[data_1['scores_pred'] > threshold] # print("IsolationForest删除之后数据量: ", data_2.shape[0]) # print('') # data_final = data_2.drop(["scores_pred"], axis=1) # print(data_final.head(3)) # print('') # data_final.to_csv('pag_with_dummy_if1.txt', index = False) # # print(data_final.head()) pag_with_dummy_if1 = df(pd.read_csv('pag_with_dummy_if1.txt')) data_final = pag_with_dummy_if1.drop(["device_id"], axis=1) #StandardScaler crude data ss = StandardScaler() data_final_=ss.fit_transform(data_final) data_final_regular = pd.DataFrame(data_final_) # find out k SSE = [] for k in range(1, 10): estimator = KMeans(n_clusters=k) # 构造聚类器 estimator.fit(data_final_regular) SSE.append(estimator.inertia_) # estimator.inertia_获取聚类准则的总和 print("k = ", k, " SSE = ",estimator.inertia_) # print(SSE) # print(X.shape())
def prepareInputs(daydata, season, UsedInputs): nbrInputs = 0 previousHours = UsedInputs[0] previousDay = UsedInputs[1] previousWeek = UsedInputs[2] temp = UsedInputs[3] tempMax = UsedInputs[4] tempMin = UsedInputs[5] dayIndicator = UsedInputs[6] if previousHours == True: nbrInputs = nbrInputs + 1 if previousDay == True: nbrInputs = nbrInputs + 1 if previousWeek == True: nbrInputs = nbrInputs + 1 if temp == True: nbrInputs = nbrInputs + 1 if tempMax == True: nbrInputs = nbrInputs + 1 if tempMin == True: nbrInputs = nbrInputs + 1 if dayIndicator == True: nbrInputs = nbrInputs + 7 hourclusters = np.empty([(daydata.index.size * 24), 1]) hourdataindex = pd.DataFrame( index=pd.date_range('2014-1-8 00:00:00', periods=(365) * 24, freq='H')) for x in range(0, daydata.index.size): for y in range(0, 24): hourclusters[(x * 24) + y, 0] = daydata.iloc[x, 24] hourclusters.size tempAlgiers = pd.read_csv('../data/tempAlgiers.csv') tempA = tempAlgiers.loc[:, 'Hour_1':'Hour_24'] tempnp = np.array(tempA) tempnp = tempnp.reshape(-1, 1) tempdata = pd.DataFrame(tempnp) tempmax = tempAlgiers.loc[:, 'Tmax'] tempmin = tempAlgiers.loc[:, 'Tmin'] tempmx = np.random.random([tempmax.size * 24, 1]) tempmn = np.random.random([tempmin.size * 24, 1]) for x in range(0, tempmax.size): for y in range(0, 24): tempmx[(x * 24) + y, 0] = tempmax.iloc[x] for x in range(0, tempmin.size): for y in range(0, 24): tempmn[(x * 24) + y, 0] = tempmin.iloc[x] samples = daydata.index.size * 24 daydata2 = daydata.copy() del (daydata2['cluster']) data = pd.DataFrame(np.array(daydata2).reshape(-1, 1)) maxcons = data.values.max() mincons = data.values.min() maxtemp = np.max(tempdata.values) mintemp = tempdata.values.min() maxtempmax = np.max(tempmx) mintempmax = np.min(tempmx) maxtempmin = np.max(tempmn) mintempmin = np.min(tempmn) sigxx = np.empty((samples - 168, nbrInputs)) sigyy = np.empty((samples - 168, 1)) i = 0 for x in list(range(168, samples)): i = 0 if previousHours == True: sigxx[x - 168, i] = (data.iloc[x - 1, 0]) / (2 * maxcons) i = i + 1 if previousDay == True: sigxx[x - 168, i] = (data.iloc[x - 24, 0]) / (2 * maxcons) i = i + 1 if previousWeek == True: sigxx[x - 168, i] = (data.iloc[x - 168, 0]) / (2 * maxcons) i = i + 1 if temp == True: sigxx[x - 168, i] = (tempdata.iloc[x]) / (2 * maxtemp) i = i + 1 if tempMax == True: sigxx[x - 168, i] = (tempmx[x]) / (2 * maxtempmax) i = i + 1 if tempMin == True: sigxx[x - 168, i] = (tempmn[x]) / (2 * maxtempmin) i = i + 1 if dayIndicator == True: ind = 0 for y in range(0, 7): sigxx[x - 168, i + ind] = 0 ind = ind + 1 sigxx[x - 168, i + pd.datetime.weekday(hourdataindex.index[x])] = 1 for x in list(range(168, samples)): sigyy[x - 168, 0] = (data.iloc[x, 0]) / (2 * maxcons) sigmoidxx = df(sigxx.copy()) sigmoidyy = df(sigyy.copy()) sigmoidxx.index = pd.date_range('2014-1-8 00:00:00', periods=(365 - 7) * 24, freq='H') sigmoidyy.index = pd.date_range('2014-1-8 00:00:00', periods=(365 - 7) * 24, freq='H') sigmoidxx['cluster'] = hourclusters[168:] sigmoidyy['cluster'] = hourclusters[168:] dfhourclusters = df(hourclusters) temp1 = sigmoidyy[sigmoidyy.cluster == 0] temp2 = sigmoidyy[sigmoidyy.cluster == 1] temp3 = sigmoidyy[sigmoidyy.cluster == 2] if season == 'summer': if temp1.index[0] == pd.datetime(2014, 4, 9, 0, 0, 0): SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 0].copy() elif temp2.index[0] == pd.datetime(2014, 4, 9, 0, 0, 0): SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 1].copy() elif temp3.index[0] == pd.datetime(2014, 4, 9, 0, 0, 0): SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 2].copy() elif season == 'winter': if temp1.index[0] == pd.datetime(2014, 1, 8, 0, 0, 0): SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 0].copy() elif temp2.index[0] == pd.datetime(2014, 1, 8, 0, 0, 0): SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 1].copy() elif temp3.index[0] == pd.datetime(2014, 1, 8, 0, 0, 0): SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 2].copy() elif season == 'spring and autumn': if temp1.index[0] == pd.datetime(2014, 3, 18, 0, 0, 0): SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 0].copy() elif temp2.index[0] == pd.datetime(2014, 3, 18, 0, 0, 0): SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 1].copy() elif temp3.index[0] == pd.datetime(2014, 3, 18, 0, 0, 0): SigmoidInputs = sigmoidxx[sigmoidxx.cluster == 2].copy() SigmoidOutputs = sigmoidyy[sigmoidyy.cluster == SigmoidInputs.loc[ SigmoidInputs.index[0], 'cluster']] del (SigmoidInputs['cluster'], SigmoidOutputs['cluster']) learningoutputs = pd.DataFrame( SigmoidOutputs.iloc[:int(SigmoidOutputs.size - 168)].values.copy(), index=SigmoidOutputs.iloc[:int(SigmoidOutputs.size - 168)].index) testoutputs = pd.DataFrame( SigmoidOutputs.iloc[int(SigmoidOutputs.size - 168):].values.copy(), index=SigmoidOutputs.iloc[int(SigmoidOutputs.size - 168):].index) learninginputs = pd.DataFrame( SigmoidInputs.iloc[:int(SigmoidOutputs.size - 168)].values.copy(), index=SigmoidOutputs.iloc[:int(SigmoidOutputs.size - 168)].index) testinputs = pd.DataFrame( SigmoidInputs.iloc[int(SigmoidOutputs.size - 168):].values.copy(), index=SigmoidOutputs.iloc[int(SigmoidOutputs.size - 168):].index) print('-------Input preparation process complet-------') return learninginputs, learningoutputs, testinputs, testoutputs, nbrInputs
from sklearn import tree import graphviz from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score dataset = read_csv('realdata3.csv') modifiedData = dataset.fillna(np.NaN) print(modifiedData.head(5)) d = modifiedData d1 = fancyimpute.MICE().complete(d) newd=df(data = d1, index = d.index, columns= list(d.columns)) newd.to_csv('test2.csv') values = newd.values outcome_var = 'BAD' model = tree.DecisionTreeClassifier(criterion = "entropy", max_depth = 7, min_samples_split=500, min_samples_leaf=500) predictor_var = ['LOAN', 'MORTDUE','REASON' , 'VALUE','DELINQ', 'DEROG' ,'CLAGE','Other','DELINQ', 'Office' ,'Sales', 'ProfExe'] X = values[: , range(18)[1:]] Y = values[:,0] model = LogisticRegression() rfe = RFE(model, 6) fit = rfe.fit(X, Y) print("Num Features: %d" % fit.n_features_) print("Selected Features: %s" % fit.support_) print("Feature Ranking: %s" % fit.ranking_)
for tr in trList_1: if 'View=' + managed_entity_view_name in tr[1].text: exportID_1 = tr[2].text url_me_1 = url_managed_entity_1 + exportID_1 r_me_1 = requests.get(url_me_1) tree_me_1 = etree.fromstring(r_me_1.content, parser=parser) trList_me_1 = tree_me_1.xpath('//tr') me_app_mapping = [] for tr in trList_me_1: if len(tr.getchildren()) == 9: if len(tr.getchildren()[2].getchildren()) == 1: if tr.getchildren()[2].getchildren()[0].text != None: attr_str = tr.getchildren()[8].getchildren()[0].text if key_attribute + '=' in attr_str: for attr in attr_str.split(','): if key_attribute + '=' in attr: app_name = attr.split('=')[1].strip() else: app_name = None mapping = { 'managedEntity': tr.getchildren()[2].getchildren()[0].text, 'AppName': app_name } me_app_mapping.append(mapping) dfMapping = df(me_app_mapping) dfMapping.head()
confirmed_names = [] deaths_names = [] def names_column(frame, lst): #Makes a new column called Name for i in range(len(frame)): if type(frame['Province/State'][i]) is str: lst.append(frame['Province/State'][i]) else: lst.append(frame['Country/Region'][i]) frame['Name'] = df(lst) names_column(confirmed, confirmed_names) names_column(deaths, deaths_names) # confirmed['Name'] = df(confirmed_names) # deaths['Name'] = df(deaths_names) confirmed['confirmed_size'] = df(confirmed_size) deaths['death_size'] = df(death_size) map_confirmed = go.Scattermapbox( customdata = confirmed[yesterdays_date ], name='Confirmed Cases', lon=confirmed['Long'], lat=confirmed['Lat'], text=confirmed['Name'], hovertemplate= "<b>%{text}</b><br>" + "Confirmed Cases: %{customdata}<br>" + "<extra></extra>", mode='markers', showlegend=True,
# if last batch in laset subset and smaller than batch size, ignore if len(imlist) < config_inference.BATCH_SIZE: print("Batch is smaller than batch size (last batch?), won't do inference on it") continue # Detect objects in this batch of images r = model.detect(imlist, verbose=0) # Init df to save coords for this batch Annots_DF = df(columns= [ "unique_nucleus_id", "slide_name", "nucleus_label", "nucleus_label_confidence", "fov_offset_xmin_ymin", "roi_offset_xmin_ymin", "center_relative_to_slide_x_y", "bounding_box_relative_to_slide_xmin_ymin_xmax_ymax", "boundary_relative_to_slide_x_coords", "boundary_relative_to_slide_y_coords", ]) # ================================================================== # Save coords for all instances in each image in batch for imidx in range(len(r)): # Extract image info iminfo = dataset.image_info[idx_start:idx_end][imidx] # convert to three channels
'과학': 70 }, { '국어': 63, '영어': 60, '수학': 31, '과학': None }, { '국어': 23, '영어': 48, '수학': None, '과학': 69 }] # 딕셔너리의 키값이 컬럼의 이름으로 지정됨 # 리스트를 원소로 갖는 딕셔너리를 사용하면, 인덱스만 따로 지정함 data = df(grade_dic, index=['철수', '영희', '민철', '수현', '호영']) print(data) print('-' * 40) # 1. 단일 조건 # => 기본적인 비교식을 사용한다 # 국어점수 > 80 인 학생 조회 result = data.query('국어 > 80') print(result) print('-' * 40) # 2. and 조건 사용 # 국어점수가 80점을 넘고, 수학점수도 80점 넘는 학생 result = data.query('국어 > 80 and 수학> 80') print(result) print('-' * 40)
import MySQLdb as my from sqlalchemy import create_engine as ce from pandas import DataFrame as df ef1 = pd.read_csv('/home/ai21/Desktop/common/Python_Exercises/emp.csv', header=None, names='name empno desig salary deptcode'.split()) ef1['deptcode'] = [121, 122, 123, 121, 121, 123, 122, 121, 121, 124] print ef1 ce1 = ce("mysql://*****:*****@127.0.0.1/ai") #ef1.to_sql("ai_21_emp", ce1) dic1={ 'dept_no':[121,122,123,124],\ 'dept_name':['CSE','ECE','MECH','IT'],\ 'dept_location':['B2F0','B3F2','B1F1','B3F1'] } df1 = df(dic1, columns=['dept_no', 'dept_name', 'dept_location']) #df1.to_sql("ai_21_dep", ce1) con = my.connect('127.0.0.1', 'ai', 'ai', 'ai') ef2 = pd.read_sql("select * from ai_21_emp", con) df2 = pd.read_sql("select * from ai_21_dep", con) df3 = ef2.merge(df2, left_on='deptcode', right_on='dept_no')[['empno', 'name', 'dept_name', 'salary']].sort_values(['salary']) print df3 #df3.to_sql("ai_21_emp_dep", ce1)
def ret_all_courses(str_input): # initializes the file object with a text file that maps each career to a major f = open(os.path.join("", "app", "all_text_files", "list_careers.txt"), "r", encoding="utf-8") all_text = f.read() f.close() list_lines = all_text.split("\n") jobs = [] majors = [] # initializing a list of jobs and list of majors for each in list_lines: parts = each.split("*") if len(parts) == 2: job = parts[0].strip() jobs.append(job) major = parts[1].strip() majors.append(major) # checks if the string entered is present in job list if str_input in jobs: course = [] desc = [] list_desc = [] fuzzy = [] # finds the index for the given job index = jobs.index(str_input) # finds the corresponding major major = majors[index] # string manipulation for the file name if ":" in major: parts = major.split(":") major = parts[0] + parts[1] f = open(os.path.join("majors", major.lower() + ".html"), encoding="utf-8") h_text = f.read() f.close() # using a soup object to search for list of courses from the website of the major soup = BeautifulSoup(h_text, "html.parser") all_a = soup.find_all("a", {'class': "bubblelink code"}) for each in all_a: course_text = clean_up_text(each.get_text()).replace("\u200b", "") desc_text = get_desc_text(course_text) if desc_text is None: print(course_text, "None!") continue if course_text[-3:].isnumeric(): if course_text[:2] == "or": course.append(course_text[2:-3] + " " + course_text[-3:]) else: course.append(course_text[:-3] + " " + course_text[-3:]) desc.append(desc_text) # using fuzzy to assign the ratio of the match which is used to sort the amount overlap fuzzy.append(fuzz.token_sort_ratio(str_input, desc_text)) # finding a more detailed description in all_data.csv for i in range(len(desc)): flag = 1 for j in range(len(csv_data)): if cell(j, "Name").lower() in desc[i].lower(): flag = 0 list_desc.append(cell(j, "Description")) break if flag == 1: list_desc.append("no desc found") # constructing the dataframe dict_vals = {"course": course, "name": desc, "description": list_desc, "fuzz": fuzzy} df_courses = df(dict_vals) # sorting by the amount of match df_courses = df_courses.sort_values(by=["fuzz"], ascending=[False]) df_courses = df_courses.reset_index(drop=True) df_courses = df_courses.drop_duplicates(subset=["course"], keep="first") df_courses = df_courses.reset_index(drop=True) # returning those rows where the amount of match is greater than 30 return df_courses[df_courses["fuzz"] > 30] else: print("job not in jobs list")
# Count of patent: 927223 dict_subcls_list = json.load( open(WORK_DIR + '/data_json/subclass_list_ini.json')) print(dict_subcls_list.keys()) print_count_subcls_list(dict_subcls_list) tmp = data[data['subclass'].isin( dict_subcls_list['subclass_Svc.'])]['patent_no'].values pat_no_list = list(set(tmp)) data_pat_Svc = data[data['patent_no'].isin(pat_no_list)] data_pat_Svc.head() tmp = data_pat_Svc.groupby('subclass').count() print("Count of subclass: {}".format(len(tmp))) df_gby_cls = df({'subclass': tmp.index.values}) df_gby_cls['count_pat'] = tmp[tmp.columns[-1]].values df_gby_cls.head() df_gby_cls.describe() """ count_pat count 727.000000 mean 1052.097662 std 13667.482842 min 1.000000 25% 2.500000 50% 18.000000 75% 104.000000 max 357936.000000 """ df_gby_cls.plot.hist(bins=100, log=True)
def generateline(stocknumber, Type, startdate, enddate, interval): startdata = startdate.encode("ascii").replace("/", "-").replace( "\n", "") #convert to tushare readable date enddata = enddate.encode("ascii").replace("/", "-").replace("\n", "") array = df() #print startdata #print enddata current_time = time.strftime("%Y/%m/%d") if Type == "分笔".decode("utf-8"): if startdate != current_time: array = ts.get_tick_data(stocknumber, date=startdata) #分笔 if array is None: return array = array.sort_values("time") date = array["time"].tolist() amount = array["amount"].tolist() atype = array["type"].tolist() price = array["price"].tolist() flag = ["bar" for i in date] for idx, val in enumerate(atype): #if卖盘,交易变成负数 if val == "卖盘": amount[idx] = -amount[idx] if val == "中性盘": #if中性盘,则忽略. Might have a problem with this part?? amount[idx] = 0 returnarray = zip(date, amount, flag, price) return returnarray else: array = ts.get_today_ticks(stocknumber) #Tushare里今日分笔和历史分笔需要分别对待 if array is None: return array = array.sort_values("time") date = array["time"].tolist() amount = array["amount"].tolist() atype = array["type"].tolist() flag = ["bar" for i in date] for idx, val in enumerate(atype): if val == "卖盘".decode("utf-8"): amount[idx] = -amount[idx] if val == "中性盘".decode("utf-8"): amount[idx] = 0 returnarray = zip(date, amount, flag) return returnarray if interval != "qfq" and interval != "hfq": #正常历史k线 if Type != "Kline": array = ts.get_k_data(stocknumber, start=startdata, end=enddata, ktype=interval) if array is None: return Type1 = firstletter(Type).encode("ascii") target = array[Type1].tolist() date = array["date"].tolist() returnarray = zip(date, target) return returnarray else: array = ts.get_k_data(stocknumber, start=startdata, end=enddata, ktype=interval) if array is None: return Date = array["date"].tolist() Open = array["open"].tolist() Close = array["close"].tolist() High = array["high"].tolist() Low = array["low"].tolist() Candlestick = zip(*[Date, Open, Close, Low, High]) return Candlestick else: if Type != "Kline": # 复权 array = ts.get_h_data(stocknumber, start=startdata, end=enddata, autype=interval) if array is None: return Type1 = firstletter(Type).encode("ascii") array = array.sort_index() target = array[Type1].tolist() date = array.index.format() returnarray = zip(date, target) return returnarray else: array = ts.get_h_data(stocknumber, start=startdata, end=enddata, autype=interval) if array is None: return array = array.sort_index() Date = array.index.format() Open = array["open"].tolist() Close = array["close"].tolist() High = array["high"].tolist() Low = array["low"].tolist() Candlestick = zip(*[Date, Open, Close, Low, High]) return Candlestick
def process_features(my_df, type, params): try: if not os.path.isdir(params.cache_dir): os.makedirs(params.cache_dir) if type == "train": dict = hkl.load( open(str(os.path.join(params.cache_dir, "train_features.hkl")), "r")) return dict["X"], dict["y_white"], dict["y_black"] elif type == "test": dict = hkl.load( open(str(os.path.join(params.cache_dir, "test_features.hkl")), "r")) return dict["X"] except: X = df() #Global X["MaxMoveScore"] = my_df["MoveScores"].apply(np.max) X["MinMoveScore"] = my_df["MoveScores"].apply(np.min) X["RangeMoveScore"] = X["MaxMoveScore"] - X["MinMoveScore"] X["IQRMoveScore"] = my_df["MoveScores"].apply(safe_iqr) X["MedianMoveScore"] = my_df["MoveScores"].apply(np.median) X["STDMoveScore"] = my_df["MoveScores"].apply(np.std) X["GameLen"] = my_df["MoveScores"].apply(len) X["MeanMoveScore"] = my_df["MoveScores"].apply(np.mean) X["ModeMoveScore"] = my_df["MoveScores"].apply( mode, axis=0).apply(lambda x: x[0][0]) # X["ModeRound10MoveScore"] = my_df["MoveScores"].apply(lambda x:np.round(x,-1)).apply(mode,axis=0).apply(lambda x:x[0][0]) X["SumMoveScore"] = my_df["MoveScores"].apply(np.sum) X["BlunderCount"] = my_df["MoveScores"].apply(catch_blunders) X["Results"] = my_df["Results"] # White Scores X["WhiteMaxMoveScore"] = my_df["WhiteMoveScores"].apply(np.max) X["WhiteMinMoveScore"] = my_df["WhiteMoveScores"].apply(np.min) X["WhiteRangeMoveScore"] = X["WhiteMaxMoveScore"] - X[ "WhiteMinMoveScore"] X["WhiteIQRMoveScore"] = my_df["WhiteMoveScores"].apply(safe_iqr) X["WhiteMedianMoveScore"] = my_df["WhiteMoveScores"].apply(safe_median) X["WhiteSTDMoveScore"] = my_df["WhiteMoveScores"].apply(safe_std) X["WhiteMeanMoveScore"] = my_df["WhiteMoveScores"].apply(safe_mean) X["WhiteModeMoveScore"] = my_df["WhiteMoveScores"].apply( safe_mode).apply(lambda x: x[0][0]) X["WhiteSumMoveScore"] = my_df["WhiteMoveScores"].apply(np.sum) X["WhiteBlunderCount"] = my_df["WhiteMoveScores"].apply(catch_blunders) # Black Scores X["BlackMaxMoveScore"] = my_df["BlackMoveScores"].apply(np.max) X["BlackMinMoveScore"] = my_df["BlackMoveScores"].apply(np.min) X["BlackRangeMoveScore"] = X["BlackMaxMoveScore"] - X[ "BlackMinMoveScore"] X["BlackIQRMoveScore"] = my_df["BlackMoveScores"].apply(safe_iqr) X["BlackMedianMoveScore"] = my_df["BlackMoveScores"].apply(safe_median) X["BlackSTDMoveScore"] = my_df["BlackMoveScores"].apply(safe_std) X["BlackMeanMoveScore"] = my_df["BlackMoveScores"].apply(safe_mean) X["BlackModeMoveScore"] = my_df["BlackMoveScores"].apply( safe_mode).apply(lambda x: x[0][0]) X["BlackSumMoveScore"] = my_df["BlackMoveScores"].apply(np.sum) X["BlackBlunderCount"] = my_df["BlackMoveScores"].apply(catch_blunders) #White Advantage #X["WhiteAdvantageMaxMoveScore"] = my_df["MoveScores"].apply(lambda x:filter(lambda elem:elem >= 0,x)).apply(safe_max) #Useless X["WhiteAdvantageMinMoveScore"] = my_df[ "WhiteAdvantageMoveScores"].apply(safe_min) X["WhiteAdvantageRangeMoveScore"] = X["MaxMoveScore"] - X[ "WhiteAdvantageMinMoveScore"] X["WhiteAdvantageIQRMoveScore"] = my_df[ "WhiteAdvantageMoveScores"].apply(safe_iqr) X["WhiteAdvantageMedianMoveScore"] = my_df[ "WhiteAdvantageMoveScores"].apply(safe_median) X["WhiteAdvantageSTDMoveScore"] = my_df[ "WhiteAdvantageMoveScores"].apply(safe_std) X["WhiteAdvantageCount"] = my_df["WhiteAdvantageMoveScores"].apply(len) X["WhiteAdvantageMeanMoveScore"] = my_df[ "WhiteAdvantageMoveScores"].apply(safe_mean) X["WhiteAdvantageModeMoveScore"] = my_df[ "WhiteAdvantageMoveScores"].apply(safe_mode).apply( lambda x: x[0][0]) # X["WhiteAdvantageModeRound10MoveScore"] = my_df["MoveMoveScores"].apply(lambda x:filter(lambda elem:elem >= 0,x)).apply(lambda x:np.round(x,-1)).apply(safe_mode).apply(lambda x:x[0][0]) #Black Advantage X["BlackAdvantageMaxMoveScore"] = my_df[ "BlackAdvantageMoveScores"].apply(safe_max) #X["BlackAdvantageMinMoveScore"] = my_df["MoveMoveScores"].apply(lambda x:filter(lambda elem:elem < 0,x)).apply(safe_min) #Useless X["BlackAdvantageRangeMoveScore"] = X[ "BlackAdvantageMaxMoveScore"] - X["MinMoveScore"] X["BlackAdvantageIQRMoveScore"] = my_df[ "BlackAdvantageMoveScores"].apply(safe_iqr) X["BlackAdvantageMedianMoveScore"] = my_df[ "BlackAdvantageMoveScores"].apply(safe_median) X["BlackAdvantageSTDMoveScore"] = my_df[ "BlackAdvantageMoveScores"].apply(safe_std) X["BlackAdvantageCount"] = my_df["BlackAdvantageMoveScores"].apply(len) X["BlackAdvantageMeanMoveScore"] = my_df[ "BlackAdvantageMoveScores"].apply(safe_mean) X["BlackAdvantageModeMoveScore"] = my_df[ "BlackAdvantageMoveScores"].apply(safe_mode).apply( lambda x: x[0][0]) # X["BlackAdvantageModeRound10MoveScore"] = my_df["BlackAdvantageScores"].apply(lambda x:np.round(x,-1)).apply(safe_mode).apply(lambda x:x[0][0]) #Partitioning ## All moves X["AllMoveScoresPartitionLen"] = my_df["Partition0MoveScores"].apply( len) for pt in range(params.partitions): X["Partition%dMaxMoveScore" % pt] = my_df["Partition%dMoveScores" % pt].apply(safe_max) X["Partition%dMinMoveScore" % pt] = my_df["Partition%dMoveScores" % pt].apply(safe_min) X["Partition%dRangeMoveScore" % pt] = X["Partition%dMaxMoveScore" % pt] - X["Partition%dMinMoveScore" % pt] X["Partition%dIQRMoveScore" % pt] = my_df["Partition%dMoveScores" % pt].apply(safe_iqr) X["Partition%dMedianMoveScore" % pt] = my_df["Partition%dMoveScores" % pt].apply(safe_median) X["Partition%dSTDMoveScore" % pt] = my_df["Partition%dMoveScores" % pt].apply(safe_std) X["Partition%dMeanMoveScore" % pt] = my_df["Partition%dMoveScores" % pt].apply(safe_mean) X["Partition%dModeMoveScore" % pt] = my_df["Partition%dMoveScores" % pt].apply(safe_mode).apply(lambda x: x[0][0]) X["Partition%dSumMoveScore" % pt] = my_df["Partition%dMoveScores" % pt].apply(np.sum) X["Partition%dBlunderCount" % pt] = my_df["Partition%dMoveScores" % pt].apply(catch_blunders) ## White moves X["WhiteMoveScoresPartitionLen"] = my_df[ "Partition0WhiteMoveScores"].apply(len) for pt in range(params.partitions): X["Partition%dMaxWhiteMoveScore" % pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_max) X["Partition%dMinWhiteMoveScore" % pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_min) X["Partition%dRangeWhiteMoveScore" % pt] = X["Partition%dMaxWhiteMoveScore" % pt] - X["Partition%dMinWhiteMoveScore" % pt] X["Partition%dIQRWhiteMoveScore" % pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_iqr) X["Partition%dMedianWhiteMoveScore" % pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_median) X["Partition%dSTDWhiteMoveScore" % pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_std) X["Partition%dMeanWhiteMoveScore" % pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_mean) X["Partition%dModeWhiteMoveScore" % pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(safe_mode).apply(lambda x: x[0][0]) X["Partition%dSumWhiteMoveScore" % pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(np.sum) X["Partition%dWhiteBlunderCount" % pt] = my_df["Partition%dWhiteMoveScores" % pt].apply(catch_blunders) ## Black moves X["BlackMoveScoresPartitionLen"] = my_df[ "Partition0BlackMoveScores"].apply(len) for pt in range(params.partitions): X["Partition%dMaxBlackMoveScore" % pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_max) X["Partition%dMinBlackMoveScore" % pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_min) X["Partition%dRangeBlackMoveScore" % pt] = X["Partition%dMaxBlackMoveScore" % pt] - X["Partition%dMinBlackMoveScore" % pt] X["Partition%dIQRBlackMoveScore" % pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_iqr) X["Partition%dMedianBlackMoveScore" % pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_median) X["Partition%dSTDBlackMoveScore" % pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_std) X["Partition%dMeanBlackMoveScore" % pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_mean) X["Partition%dModeBlackMoveScore" % pt] = my_df["Partition%dBlackMoveScores" % pt].apply(safe_mode).apply(lambda x: x[0][0]) X["Partition%dSumBlackMoveScore" % pt] = my_df["Partition%dBlackMoveScores" % pt].apply(np.sum) X["Partition%dBlackBlunderCount" % pt] = my_df["Partition%dBlackMoveScores" % pt].apply(catch_blunders) ## WhiteAdvantage moves X["WhiteAdvantageMoveScoresPartitionLen"] = my_df[ "Partition0WhiteAdvantageMoveScores"].apply(len) for pt in range(params.partitions): X["Partition%dMaxWhiteAdvantageMoveScore" % pt] = my_df["Partition%dWhiteAdvantageMoveScores" % pt].apply(safe_max) X["Partition%dMinWhiteAdvantageMoveScore" % pt] = my_df["Partition%dWhiteAdvantageMoveScores" % pt].apply(safe_min) X["Partition%dRangeWhiteAdvantageMoveScore" % pt] = X["Partition%dMaxWhiteAdvantageMoveScore" % pt] - X["Partition%dMinWhiteAdvantageMoveScore" % pt] X["Partition%dIQRWhiteAdvantageMoveScore" % pt] = my_df["Partition%dWhiteAdvantageMoveScores" % pt].apply(safe_iqr) X["Partition%dMedianWhiteAdvantageMoveScore" % pt] = my_df["Partition%dWhiteAdvantageMoveScores" % pt].apply(safe_median) X["Partition%dSTDWhiteAdvantageMoveScore" % pt] = my_df["Partition%dWhiteAdvantageMoveScores" % pt].apply(safe_std) X["Partition%dMeanWhiteAdvantageMoveScore" % pt] = my_df["Partition%dWhiteAdvantageMoveScores" % pt].apply(safe_mean) X["Partition%dModeWhiteAdvantageMoveScore" % pt] = my_df["Partition%dWhiteAdvantageMoveScores" % pt].apply(safe_mode).apply(lambda x: x[0][0]) X["Partition%dSumWhiteAdvantageMoveScore" % pt] = my_df["Partition%dWhiteAdvantageMoveScores" % pt].apply( np.sum) X["Partition%dWhiteAdvantageBlunderCount" % pt] = my_df["Partition%dWhiteAdvantageMoveScores" % pt].apply(catch_blunders) ## BlackAdvantage moves X["BlackAdvantageMoveScoresPartitionLen"] = my_df[ "Partition0BlackAdvantageMoveScores"].apply(len) for pt in range(params.partitions): X["Partition%dMaxBlackAdvantageMoveScore" % pt] = my_df["Partition%dBlackAdvantageMoveScores" % pt].apply(safe_max) X["Partition%dMinBlackAdvantageMoveScore" % pt] = my_df["Partition%dBlackAdvantageMoveScores" % pt].apply(safe_min) X["Partition%dRangeBlackAdvantageMoveScore" % pt] = X["Partition%dMaxBlackAdvantageMoveScore" % pt] - X["Partition%dMinBlackAdvantageMoveScore" % pt] X["Partition%dIQRBlackAdvantageMoveScore" % pt] = my_df["Partition%dBlackAdvantageMoveScores" % pt].apply(safe_iqr) X["Partition%dMedianBlackAdvantageMoveScore" % pt] = my_df["Partition%dBlackAdvantageMoveScores" % pt].apply(safe_median) X["Partition%dSTDBlackAdvantageMoveScore" % pt] = my_df["Partition%dBlackAdvantageMoveScores" % pt].apply(safe_std) X["Partition%dMeanBlackAdvantageMoveScore" % pt] = my_df["Partition%dBlackAdvantageMoveScores" % pt].apply(safe_mean) X["Partition%dModeBlackAdvantageMoveScore" % pt] = my_df["Partition%dBlackAdvantageMoveScores" % pt].apply(safe_mode).apply(lambda x: x[0][0]) X["Partition%dSumBlackAdvantageMoveScore" % pt] = my_df["Partition%dBlackAdvantageMoveScores" % pt].apply( np.sum) X["Partition%dBlackAdvantageBlunderCount" % pt] = my_df["Partition%dBlackAdvantageMoveScores" % pt].apply(catch_blunders) if type == "train": y_white = my_df["WhiteEloScore"].apply(int) y_black = my_df["BlackEloScore"].apply(int) dict = {} dict["X"] = X dict["y_white"] = y_white dict["y_black"] = y_black hkl.dump( dict, open(str(os.path.join(params.cache_dir, "train_features.hkl")), "wb")) return X, y_white, y_black elif type == "test": dict = {} dict["X"] = X hkl.dump( dict, open(str(os.path.join(params.cache_dir, "test_features.hkl")), "wb")) return X
import pandas as pd from pandas import DataFrame as df from pandas import Series as s import numpy as np int_ary = np.arange(1, 17).reshape(4, 4) masked = np.ma.masked_array(int_ary, mask=int_ary % 3 == 0) print masked masked = df(masked) print masked
def orderbook(self): depth = self.session.get("%s/public/orderbook/%s" % (self.url, self.symbol), timeout=2).json() asks = np.array(df(depth["ask"])).astype(float)[:20] bids = np.array(df(depth["bid"])).astype(float)[:20] return {"ask":asks, "bid":bids}
def read_DGS(filename): """ Read a DigSilent Power Factory .dgs file and return a dictionary with the data Args: filename: File name or path Returns: Dictionary of data where the keys are the object types and the values are the data of the objects of the key object type """ ############################################################################### # Read the file ############################################################################### f = open(filename, errors='replace') lines = f.readlines() f.close() ############################################################################### # Process the data ############################################################################### data = dict() """ Numpy types: 'b' boolean 'i' (signed) integer 'u' unsigned integer 'f' floating-point 'c' complex-floating point 'O' (Python) objects 'S', 'a' (byte-)string 'U' Unicode 'V' raw data (void) """ """ DGS types a p i r """ types_dict = dict() types_dict["a"] = "|S32" types_dict["p"] = "|S32" types_dict["i"] = "<i4" types_dict["r"] = "<f4" types_dict["d"] = "<f4" types_dict2 = dict() current_type = None data_types = None header = None Headers = dict() # parse the file lines for line in lines: if line.startswith("$$"): line = line[2:] chnks = line.split(";") current_type = chnks[0] data[current_type] = list() print(current_type) # analyze types data_types = list() header = list() for i in range(1, len(chnks)): token = chnks[i].split("(") name = token[0] tpe = token[1][:-1] data_types.append((name, types_dict[tpe[0]])) header.append(name) types_dict2[current_type] = data_types Headers[current_type] = header elif line.startswith("*"): pass elif line.startswith(" "): if current_type is not None: line = line.strip() chnks = line.split(";") chnks = ["0" if x == "" else x for x in chnks] data[current_type].append(array(tuple(chnks))) # format keys for key in data.keys(): print("Converting " + str(key)) table = array([tuple(x) for x in data[key]], dtype=types_dict2[key]) table = array([list(x) for x in table], dtype=np.object) header = Headers[key] data[key] = df(data=table, columns=header) # positions dictionary obj_id = data['IntGrf']['pDataObj'].values x_vec = data['IntGrf']['rCenterX'].values y_vec = data['IntGrf']['rCenterY'].values pos_dict = dict() for i in range(len(obj_id)): pos_dict[obj_id[i]] = (x_vec[i], y_vec[i]) return data, pos_dict
def __init__(self, col_list="default", attr_list="default", categorize=None, data_opt='train', scaling='mean-std', holdout=0.3, random_seed=42, oversampler=None): self.col_list = col_list self.attr_list = attr_list if (col_list == 'default'): self.col_list = [ 'ADMISSIONS', 'ICUSTAYS', 'INPUTEVENTS_MV', 'PATIENTS' ] if (attr_list == 'default'): self.attr_list = { 'ADMISSIONS': ['ADMISSION_TYPE', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME'], 'ICUSTAYS': ['LOS'], 'INPUTEVENTS_MV': ['PATIENTWEIGHT'], 'PATIENTS': ['DOB', 'GENDER'] } for y in ['ADMITTIME', 'DISCHTIME', 'DEATHTIME']: if y not in self.attr_list['ADMISSIONS']: self.attr_list['ADMISSIONS'].append(y) self.datasetX = None self.datasetY = None col_list = self.col_list conn = pymysql.connect(host='192.168.56.104', user='******', password='******', db='mimiciiiv14', charset='utf8') curs = conn.cursor(pymysql.cursors.DictCursor) # Select LABEVENTS,SUBJECT_ID FROM LABEVENTS JOIN PATIENTS on LABEVENTS.SUBJECT_ID = PATIENTS.SUBJECT_ID JOIN ADMISSIONS on PATIENTS.SUBJECT_ID = ADMISSIONS.SUBJECT_ID sql_line = 'SELECT' for col in col_list: for attr in self.attr_list[col]: sql_line += ' ,' + col + '.' + attr sql_line += ' FROM ' + col_list[0] sql_line = sql_line[:7] + sql_line[8:] prev = col_list[0] for col in col_list[1:]: if col != 'PATIENTS': sql_line += ' JOIN {0} on {1}.SUBJECT_ID = {0}.SUBJECT_ID and {1}.HADM_ID = {0}.HADM_ID'.format( col, prev) else: sql_line += ' JOIN {0} on {1}.SUBJECT_ID = {0}.SUBJECT_ID'.format( col, prev) col_list[0] = col sql_line += ';' curs.execute(sql_line) result = curs.fetchall() print(df(result)) # 여기부터 self.datasetX = df(result) self.datasetY = self.datasetX[['ADMITTIME', 'DISCHTIME', 'DEATHTIME']] self.datasetX = self.datasetX.drop( ['ADMITTIME', 'DISCHTIME', 'DEATHTIME'], axis=1) self.datasetX = changeValue(self.datasetX) #.to_numpy() for i in self.datasetX.columns: if self.datasetX[i].dtype == object: self.datasetX = pd.concat([ self.datasetX, pd.get_dummies(self.datasetX[i], prefix=i) ], axis=1) del (self.datasetX[i]) if ((type(categorize) is list) and categorize != None): for i in categorize: self.datasetX = pd.concat([ self.datasetX, pd.get_dummies(self.datasetX[i], prefix=i) ], axis=1) del (self.datasetX[i]) print(self.datasetX.shape) print(self.datasetX) self.datasetY = cal_days(self.datasetY) self.datasetY = self.datasetY.fillna(self.datasetY.mean()) self.datasetY = self.datasetY.to_numpy() # 여기까지 뜯어 고쳐야함. X_train, X_test, y_train, y_test = train_test_split( self.datasetX, self.datasetY, test_size=holdout, random_state=random_seed) if (scaling == 'mean-std'): std_scaler = StandardScaler() X_train = std_scaler.fit_transform(X_train) X_test = std_scaler.transform(X_test) if (scaling == 'min-max'): scaler = MinMaxScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) if (data_opt == 'train'): self.X = torch.from_numpy(X_train) self.y = torch.from_numpy(y_train) if (oversampler == 'Random'): ros = RandomOverSampler(random_state=random_seed) self.X, self.y = ros.fit_resample(self.X, self.y) if (oversampler == 'ADASYN'): self.X, self.y = ADASYN(random_state=random_seed).fit_resample( self.X, self.y) if (oversampler == 'SMOTE'): self.X, self.y = SMOTE(random_state=random_seed).fit_resample( self.X, self.y) else: self.X = torch.from_numpy(X_test) self.y = torch.from_numpy(y_test) self.length = self.X.shape[0]
def __init__(self): super(MyUi, self).__init__() self.ui = Ui_MainWindow() self.ui.setupUi(self) cwd = os.getcwd() cwd = str(cwd) if os.path.isfile(cwd + "/time"): with open("time", "r") as outfile: #reads current time history = cPickle.load(outfile) if (datetime.now() - history ).total_seconds() < 43200: #measures if time elapse>12 hours print("Less than 12 hours. Loading previously saved Pickle...") #with open("time","w") as infile: #update time #cPickle.dump(datetime.now(),infile) # else: print("More than 12 hours. Updating Pickle...") data = ts.get_industry_classified() with open("class", "w+") as outfile: cPickle.dump(data, outfile) now = datetime.now() with open("time", "w+") as outfile: #update time cPickle.dump(now, outfile) else: print("No Pickle found!" ) #If this is first time using tuchart in this directory data = df() data = ts.get_industry_classified() with open('class', 'w+') as outfile: #records pickle cPickle.dump(data, outfile) now = datetime.now() with open("time", "w+") as outfile: cPickle.dump(now, outfile) with open("class", "r") as infile: # reads current time series = cPickle.load(infile) #series = pd.read_json(cwd + "\\class.json") #series = ts.get_industry_classified() series = pd.DataFrame(series) curdate = time.strftime( "%Y/%m/%d") # gets current time to put into dateedit curdateQ = QDate.fromString(curdate, "yyyy/MM/dd") dateobj = datetime.strptime(curdate, "%Y/%m/%d") #converts to datetime object past = dateobj - timedelta(days=7) #minus a week to start date pasttime = datetime.strftime(past, "%Y/%m/%d") pastQ = QDate.fromString( pasttime, "yyyy/MM/dd") #convert to qtime so that widget accepts the values pastL = dateobj - timedelta(days=30) # minus a month to start date pasttimeL = datetime.strftime(pastL, "%Y/%m/%d") pastQL = QDate.fromString(pasttimeL, "yyyy/MM/dd") np_indexes = np.array([['sh', '上证指数', '大盘指数'], ['sz', '深证成指', '大盘指数'], ['hs300', '沪深300指数', '大盘指数'], ['sz50', '上证50', '大盘指数'], ['zxb', '中小板', '大盘指数'], ['cyb', '创业板', '大盘指数']]) indexes = df(data=np_indexes, index=range(5000, 5006), columns=["code", "name", "c_name"]) series = indexes.append(series) list1_bfr = series["c_name"].tolist( ) #Get industry categories. Filters out redundant ones list1 = list(set(list1_bfr)) list1.sort(key=list1_bfr.index) #w = database() #zsparent = QTreeWidgetItem(self.ui.treeWidget) #zsparent.setText(0,"股票指数") #zsnames =["上证指数-sh","深圳成指-sz","沪深300指数-hs300","上证50-"] self.init_treeWidget(list1, series) self.ui.treeWidget.setContextMenuPolicy(Qt.CustomContextMenu) self.ui.treeWidget.customContextMenuRequested.connect(self.openMenu) #self.ui.webView.setGeometry(QtCore.QRect(0, 30,1550, 861)) file_path = os.path.abspath( os.path.join(os.path.dirname(__file__), "render.html")) #path to read html file local_url = QUrl.fromLocalFile(file_path) self.ui.webView.load(local_url) #self.ui.commandLinkButton.setFixedSize(50, 50) self.ui.search_btn.clicked.connect(lambda: self.search_comp(series)) self.ui.log_btn.clicked.connect(lambda: self.log()) self.ui.init_code_btn.clicked.connect( lambda: self.code_sort_tree(series)) self.ui.init_category_btn.clicked.connect( lambda: self.init_treeWidget(list1, series)) self.ui.commandLinkButton.clicked.connect( self.classify) #when the arrow button is clicked, trigger events #self.ui.commandLinkButton.clicked.connect(lambda action: self.classify(action, self.ui.treewidget)) # QSizePolicy try: retain_size = self.ui.dateEdit_2.sizePolicy() retain_size.setRetainSizeWhenHidden(True) self.ui.dateEdit_2.setSizePolicy(retain_size) retain_size = self.ui.comboBox.sizePolicy() retain_size.setRetainSizeWhenHidden(True) # self.ui.comboBox.setSizePolicy(retain_size) retain_size = self.ui.label_2.sizePolicy() retain_size.setRetainSizeWhenHidden(True) self.ui.label_2.setSizePolicy(retain_size) except AttributeError: print("No PYQT5 Binding! Widgets might be deformed") self.ui.dateEdit.setDate(pastQL) self.ui.dateEdit_2.setDate(curdateQ) #populate widgets self.ui.dateEdit.setCalendarPopup(True) self.ui.dateEdit_2.setCalendarPopup(True) self.ui.comboBox.addItems(["D"]) self.ui.treeWidget_2.setDragDropMode(self.ui.treeWidget_2.InternalMove) self.ui.treeWidget_2.setContextMenuPolicy(Qt.CustomContextMenu) self.ui.treeWidget_2.customContextMenuRequested.connect( self.openWidgetMenu) #self.ui.toolbutton.clicked.connect(lambda action: self.graphmerge(action, CombineKeyword)) self.ui.combobox.currentIndexChanged.connect( lambda: self.modifycombo(pastQL, pastQ))
if row.Type == 'Sell': last_cpl = row[4] # 'Cumulative P/L' column if isnan(last_cpl): break rowdict['Position Id'] = int(posid) rowdict['Date/Time'] = row[3] # 'Date/Time' column rowdict['Order Type'] = row.Type rowdict['Profit/Loss'] = trade_pl rowdict['Cumulative P/L'] = last_cpl rows.append(rowdict) # create an empyt equity curve DataFrame #ecols = ['Position Id', 'Date/Time', 'Order Type', # 'Profit/Loss', 'Cumulative P/L'] edf = df(rows) #fig = px.line(edf, x='Date/Time', y='Cumulative P/L', title="Equity Curve"); fig = make_subplots(specs=[[{"secondary_y": True}]]) fig.add_trace(go.Candlestick(x=cdf['Date/Time'], open=cdf['Open'], high=cdf['High'], low=cdf['Low'], close=cdf['Close']), secondary_y=False) fig.add_trace(go.Scatter(x=edf['Date/Time'], y=edf['Cumulative P/L']), secondary_y=True) #fig.update_xaxes(rangebreaks=[dict(bounds=["sat", "mon"])])
from pnn import PNN if __name__ == '__main__': np.random.seed(42) try: train_set = np.array(read_csv("data/input/KDDTrain_procsd_redcd.csv")) test_set = np.array(read_csv("data/input/KDDTest_procsd.csv")) n = int(train_set.shape[1] - 1) input("Press 'Enter to start'") train_set_in = train_set[:, 0:n] train_set_out = train_set[:, n] test_set_in = test_set[:, 0:n] test_set_out = test_set[:, n] pnn = PNN(train_set_in, train_set_out) print("\tRECOGNITION (testing set)") test_recog = pnn.recognize(test_set_in, test_set_out).squeeze() testDF = df({"expected": test_set_out, "recognized": test_recog}) testDF.to_csv("data/output/KDD_testing_pnn.csv") except Exception as e: print("Exception occured:\n{}".format(e)) finally: input("Press 'Enter to quit'")
import requests from pandas import DataFrame as df import plotly.graph_objects as go #Le pego a la api r = requests.get('https://coronavirus-tracker-api.herokuapp.com/v2/locations') #Pido la columna de location r = df(r.json()['locations']) lon = [] lat = [] for x in r['coordinates']: lon.append(x['longitude']) lat.append(x['latitude']) r['lat'] = df(lat) r['lon'] = df(lon) confirmed = [] confirmed_size = [] deaths = [] deaths_size = [] recovered =[] recovered_size = [] for x in r['latest']: confirmed.append(x['confirmed']) confirmed_size.append(int(x['confirmed'])/700) deaths.append(x['deaths'])
def PrintData(x): print(df(x))