def reward_holders(TOKENS,EXCEPTIONS,MEMOS,POT,PUB=False,MULT=0,FIXED_NUM=0): print("rewarding holders...") COLLECTABLES = [] for t in TOKENS: COLLECTABLES.append([]) for x in t.split(":"): COLLECTABLES[len(COLLECTABLES)-1].append(x) EXCEPT = [] for e in EXCEPTIONS: EXCEPT.append(e) MEMO = MEMOS[rand(0,len(MEMOS)-1)] holders_totals, percent_stakes = Functions.get_percent_holders(COLLECTABLES,EXCEPT,public=PUB) if not (MULT or FIXED_NUM): sendpot, receipt = Functions.send_payments(percent_stakes,POT,MEMO,public=PUB) elif MULT: sendpot, receipt = Functions.send_payments(holders_totals,POT,MEMO,public=PUB,multiplier=MULT) else: sendpot, receipt = Functions.send_payments(percent_stakes,POT,MEMO,public=PUB,fixed_amount=FIXED_NUM) Receipt_String = "\n\t Totals " + DF(holders_totals).to_string() Receipt_String += "\n\n\t Totals by % " + DF(percent_stakes).to_string() Receipt_String += '\n\n\t Sending ..."' + DF(sendpot).to_string() + receipt # log transaction data locally receipt_file = "Receipts/" + time.strftime("%Y%m%d%H%M")+ ".txt" log = open(receipt_file,'w') log.write(Receipt_String) log.close() return Receipt_String,receipt_file
def get_category_count(name, deal_now, train_data, start_date, end_date): count = DF( deal_now.groupby(['user_id', name ]).size().reset_index().rename(columns={0: 'times'})) count_size = deal_now.groupby([name]).size().shape[0] sum_data = 0 for i in range(0, count_size): new_name = 'see_' + name + '_' + str(i) temp = pd.merge(train_data, count[count[name] == i], on=['user_id']).rename(columns={'times': new_name}) train_have = pd.merge(train_data, temp[['user_id', new_name]], on=['user_id']) train_have = train_have[['user_id', new_name]] not_have_name = list( set(train_data['user_id'].values) - set(train_have['user_id'].values)) train_not_have = DF() train_not_have['user_id'] = train_data[train_data['user_id'].isin( not_have_name)]['user_id'] train_not_have['see_' + name + '_' + str(i)] = 0 temp = pd.concat([train_have, train_not_have], axis=0) train_data = pd.merge(train_data, temp, on=['user_id'], how='left') sum_data += train_data[new_name].values for i in range(0, count_size): new_name = 'see_' + name + '_' + str(i) train_data[new_name + '_ratio'] = train_data[new_name].values / sum_data return train_data
def __init__(self, debug=0): self.data_url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5' x = requests.get(self.data_url).json() data = json.loads(x['data']) if debug: self.data = data a = data['areaTree'][0]['children'][0]['today'].keys() b = ['total_' + i for i in data['areaTree'][0]['total'].keys()] self.__header = ['city'] + b + list(a) self.chinaDayList = DF([i.values() for i in data['chinaDayList']], columns=data['chinaDayList'][0].keys()) self.chinaDayADD = DF([i.values() for i in data['chinaDayAddList']], columns=data['chinaDayAddList'][0].keys()) print( f'截止 {data["lastUpdateTime"]}; 2019nCoV 已蔓延 {len(data["areaTree"])} 个国家/地区' ) print( f'中国累计 {data["chinaTotal"]["confirm"]} 例确诊,自昨日00:00新增{data["chinaAdd"]["confirm"]}' ) # 各个地区数据 areaTree = data['areaTree'] self.area_dict = {} #记录全部地区 self.total_rec = [self._detail_area(i) for i in areaTree] #解析全部记录 self.all_area = set(self.area_dict.keys()) # 所有的地名 self.global_area = DF(self._country(areaTree), columns=['country'] + self.__header[1:]) # 全球感染情况 self.china = DF(self._country(areaTree[0]['children']), columns=self.__header) # 国内感染情况
def permLabels(self, isSavePerms=1): if isBoolLabel == 1: L = self.boolLabelsDF elif isBoolLabel == 0: L = self.contLabelsDF self.permedBoolLabelsDF = DF(index=L.index, columns=L.columns) for col in L.columns: self.permedBoolLabelsDF[col] = np.random.permutation( L[col] ) #TODO - test what happens if the rand perm is on each label seperatly try: L = self.contLabelsDF self.permedContLabelsDF = DF(index=L.index, columns=L.columns) for col in L.columns: self.permedContLabelsDF[col] = np.random.permutation(L[col]) except AttributeError: pass if 'isSavePerms' not in locals(): isSavePerms = int(raw_input('save permed Labels? ')) if isSavePerms: print('\nsaving to pickle...') pickle.dump(self, open(self.LabelsPath + '.pickle', 'wb')) print('saving to csv...') self.boolLabelsDF.to_csv(self.LabelsPath + 'DF.csv') print('All Labels Data successfully saved to ' + self.LabelsPath)
def main(): #Here we are cleaning up the previous analysys Data clean_up() # These are the functions to get the Exchanges tickers and filaing them into there Data Frames for each Exchange exchange_data = get_exchange_data(my_exchange_list) exchange_data_CB = DF(exchange_data['tickers'][0]) exhhange_data_KC = DF(exchange_data['tickers'][1]) # This Cleans the data so that it can be more easily worked with clean_KC_data = clean_exchange_data(exhhange_data_KC) clean_CB_data = clean_exchange_data(exchange_data_CB) # this seperates the information into the trading pairs trading_pairs_KC = trading_pair_df(clean_KC_data) trading_pairs_CB = trading_pair_df(clean_CB_data) #This is where the CSV files will be created for each echanges and send the datat that they have for analys records = int(input("How Many Records Would you like> ")) days = input("How Many Days (ie. 1, 7, 14, 30)> ") trading_pairs = [trading_pairs_CB, trading_pairs_KC] for x in range(len(trading_pairs)): ohlc_to_csv(trading_pairs[x], my_exchange_list[x], records, days)
def __init__(self, DetailsDF, LabelsPath): self.SubjectsDetails = DetailsDF self.SubjectsList = list(DetailsDF.index.unique()) self.boolLabelsDF = DF(index=self.SubjectsList) self.contLabelsDF = DF(index=self.SubjectsList) self.LabelingDetails = {} self.LabelingMethod = None self.isSave = int(raw_input('save Labels? ')) self.LabelsPath = LabelsPath
def PredictSales(pdate=dt.date.today()): # get all salesmen details in the system #tsm.trainandsavemodel() data = pd.read_excel("Salesmandata.xlsx") productdata = pd.read_excel("Productdata.xlsx") productdata = productdata.values print("Weekday :", (calendar.day_name[pdate.weekday()])) # getting the day from the date day = calendar.day_name[pdate.weekday()] year = pdate.year month = pdate.month weekofYear = pdate.isocalendar()[1] print(weekofYear) # selecting the salesmen details for the day predicteddetailsDF = DF() salesmandata = data[data['Day'] == day] print(data['Day']) print(salesmandata) # salesmandata = salesmandata.values fileName = 'PredictedSales_' + '.xlsx' print(fileName) wo = pd.ExcelWriter(fileName) for salesman in salesmandata: # Prints the salesman data print("Salesman:" + str(salesman)) predicteddetailsDF = DF() predicteddict = dict({'Salesman': [], 'Product': [], 'Sale': []}) for product in productdata: predicteddict['Salesman'].append(salesman[2]) predicteddict['Product'].append(product[1]) # prints the product details print(product) predictsalelist = [ product[0], year, month, weekofYear, salesman[1], product[2], salesman[0], salesman[3] ] # predictsale gets the value present inside predictsalelist predictsale = np.array([predictsalelist]) # predictedValue gets the value returned by the function predictsale predictedValue = ps.predictsale(predictsale) # Prints the value returned by the function predictsale print("PredictedValue[0]", predictedValue[0]) predicteddict['Sale'].append(predictedValue[0]) predicteddetailsDF = pd.DataFrame(predicteddict) print("==================================") predicteddetailsDF.to_excel(wo, salesman[2]) print(predicteddetailsDF) wo.save()
def extract_features(filepath, model='VGG16', write_to=None): """ Reads an input image file, or directory containing images, and returns resulting extracted features. Use write_to=<some_filepath> to save the features somewhere. """ # print('Extracting features') # Get the model # print('Acquiring model "{}"'.format(model), end='') m = named_model(model) # print('\rAcquired model\t\t\t\t\t') # Get the image filepaths filepath = filepath.replace('\\', '/') img_fps = [] assert os.path.exists(filepath), \ 'Filepath does not exist: "{}"'.format(filepath) if os.path.isfile(filepath): ext = filepath.lower().rsplit('.', 1)[-1] assert ext in IMG_EXTS, \ 'Specified file "{}" is not in recognised image formats'.format(filepath) img_fps = img_fps.append(filepath) elif os.path.isdir(filepath): for fn in os.listdir(filepath): ext = fn.rsplit('.', 1)[-1] if ext in IMG_EXTS: img_fps.append(os.path.join(filepath, fn)) else: raise ValueError( 'Filepath should be an image, or a directory containing images') # And the image filenames img_fns = [fp.replace('\\', '/').rsplit('/', 1)[-1] for fp in img_fps] # print('Found {} images'.format(len(img_fns))) # Run the extraction over each image features = [] for (i, fp) in enumerate(img_fps): # print('\rProcessing: {:.2f}%\t\t'.format((i + 1) / len(img_fps) * 100), end='', flush=True) features.append(_extract(fp, m)) # print('\nSuccess') # Make into a DataFrame and add an ID column features_df = DF(features, dtype=object) id_col = DF(img_fns, dtype=str) features_df.insert(0, 'ID', id_col) features_df.to_csv(write_to, index=False) return features_df
def matrix_scatter_pcs(names=None, tks=True, **kwargs): import itertools from pandas import DataFrame as DF import pandas as pd to_plot = [] for key, value in kwargs.iteritems(): if key != "tks": if value.shape[0] > value.shape[1]: to_plot.append((key, DF(value.T))) else: to_plot.append((key, DF(value))) numvar = to_plot[0][1].shape[0] s = range(numvar) if names == None: names = ["PC " + str(x + 1) for x in s] fgr, axs = plt.subplots(numvar, numvar) # colors = cm.rainbow(np.linspace(0, 1, numvar)) colors = ["blue", "red", "green"] for xidx, yidx in itertools.product(s, repeat=2): ax = axs[yidx, xidx] # Hide all ticks and labels ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) # Set up ticks only on one side for the "edge" subplots... if xidx == 0: ax.set_ylabel(names[yidx]) ax.yaxis.set_ticks([]) ax.yaxis.set_visible(True) if xidx == max(s): if tks: ax.yaxis.set_ticks_position('right') ax.yaxis.set_visible(True) if yidx == 0: ax.set_xlabel(names[xidx]) ax.xaxis.set_label_position("top") ax.xaxis.set_visible(True) ax.xaxis.set_ticks([]) if yidx == max(s): if tks: ax.xaxis.set_ticks_position('bottom') ax.xaxis.set_visible(True) xs = [] ys = [] i = 0 for name, plot in to_plot: x, y = plot.iloc[xidx, :], plot.iloc[yidx, :] xs.append(x) ys.append(y) ax.scatter(x, y, s=3, color=colors[i], label=name) i += 1 allxs = pd.concat(xs) allys = pd.concat(ys) ax.set_xlim([min(allxs), max(allxs)]) ax.set_ylim([min(allys), max(allys)]) return fgr, axs
def _read_tns_json(data, discrete, instantaneous, merge): instantaneous = ('tf' not in data if instantaneous is None else instantaneous) if instantaneous: return ITemporalNodeSetDF(DF(data), discrete=discrete, no_duplicates=merge) else: return TemporalNodeSetDF(DF(data), discrete=discrete, disjoint_intervals=merge)
def basis_corr(frame, algo='SparCC', **kwargs): ''' Compute correlations between all columns of a counts frame. This is a wrapper around pysurvey.analysis.basis_correlations.main Parameters ---------- counts : array_like 2D array of counts. Columns are components, rows are samples. method : str {SparCC (default)| clr| pearson| spearman| kendall} The algorithm to use for computing correlation. Returns ------- cor_med: frame Estimated correlation matrix. Labels are column labels of input frame. cov_med: frame/None If method in {SparCC, clr} : Estimated covariance matrix. Labels are column labels of input frame. Otherwise: None. ======= ============ ======= ================================================ kwarg Accepts Default Desctiption ======= ============ ======= ================================================ iter int 20 number of estimation iteration to average over. oprint bool True print iteration progress? th 0<th<1 0.1 exclusion threshold for SparCC. xiter int 10 number of exclusion iterations for sparcc. norm str dirichlet method used to normalize the counts to fractions. log bool True log-transform fraction? used if method ~= SparCC/CLR ======= ============ ========= ================================================ ''' import SparCC comps = frame.columns cor_med, cov_med, pval = SparCC.main(frame, algo=algo, **kwargs) #print cor_med.shape cor = DF(cor_med, index=comps, columns=comps) if cov_med is None: cov = None else: cov = DF(cov_med, index=comps, columns=comps) #print "***********************************************************" #print "BASIS_CORR FUNCTION" #print "COR" #print cor #print "COV" #print cov #print "PVAL" #print pval #print "***********************************************************" return cor, cov, pval
def column_types_table(data): print('Number of each type of columns:') count_dtype = DF(data.dtypes.value_counts()).reset_index() count_dtype.columns = ['name', 'total'] print(count_dtype) print('\nNumber of unique classes in each columns:') for i in count_dtype['name'].values: print('Type: ', i) print( DF(data.select_dtypes(i).apply( pd.Series.nunique, axis=0)).sort_values( by=[0], ascending=False).rename(columns={0: 'NUNIQUE'}))
def label_analysis(data, label_name=None, feature_name=[]): print('LABEL CATEGORY Analysis') count_label = DF(data[label_name].value_counts()).reset_index() count_label.columns = ['cate', 'total'] print(count_label) try: data[label_name].astype(int).plot.hist() plt.show() except: data[label_name].fillna(-1).astype(int).plot.hist() plt.show() # Describe 01 if len(feature_name) == 0: feature_name = [i for i in data.columns if i not in [ label_name, ]] print('Want To Watch: ', len(feature_name)) print(feature_name) print('Describe in each columns: ') for i in count_label['cate'].values: print('Cate: ', i) print(data[data[label_name].astype(int) == i][feature_name].describe()) print('CALC CORR') correlations = data.corr()[label_name].sort_values() print('Most Positive Correlations:\n', correlations.tail(15)) print('\nMost Negative Correlations:\n', correlations.head(15))
def saveNoticeToCSV(self): result = DF(self.data) result.columns = ['title', 'url'] file = '학교_학과_공지사항.csv' if path.isfile(file): remove(file) result.to_csv(file, encoding='cp949')
def get_pic(model, feature_name): ans = DF() ans['name'] = feature_name ans['score'] = model.feature_importances_ # print(ans[ans['score']>0].shape) return ans.sort_values(by=['score'], ascending=False).reset_index(drop=True)
def CollectCompanies(br): companyList = [] for i in tqdm(range(1, 10)): url = f"http://www.annualreports.com/Companies?exch={i}" log.info(url) br.get(url) try: tbody = WebDriverWait(br, 30).until( EC.presence_of_element_located((By.XPATH, "//table/tbody"))) except Exception as e: log.info(f"{url} ==> {e}") tbody.get_attribute('innerHTML') for tr in tbody.find_elements_by_xpath(".//tr"): row = {} td = tr.find_elements_by_xpath(".//td") row['CompanyNameAr'] = td[0].text row['UrlAr'] = td[0].find_element_by_xpath(".//a").get_attribute( 'href') companyList.append(row) if debug: break return DF(companyList)
def get_division_feature(data, feature_name): new_feature = [] new_feature_name = [] for i in range(len(data[feature_name].columns) - 1): for j in range(i + 1, len(data[feature_name].columns)): new_feature_name.append(data[feature_name].columns[i] + '/' + data[feature_name].columns[j]) new_feature_name.append(data[feature_name].columns[i] + '*' + data[feature_name].columns[j]) new_feature_name.append(data[feature_name].columns[i] + '+' + data[feature_name].columns[j]) new_feature_name.append(data[feature_name].columns[i] + '-' + data[feature_name].columns[j]) new_feature.append(data[data[feature_name].columns[i]] / data[data[feature_name].columns[j]]) new_feature.append(data[data[feature_name].columns[i]] * data[data[feature_name].columns[j]]) new_feature.append(data[data[feature_name].columns[i]] + data[data[feature_name].columns[j]]) new_feature.append(data[data[feature_name].columns[i]] - data[data[feature_name].columns[j]]) temp_data = DF(pd.concat(new_feature, axis=1)) temp_data.columns = new_feature_name data = pd.concat([data, temp_data], axis=1).reset_index(drop=True) print(data.shape) return data.reset_index(drop=True)
def get_division_feature(data ,feature_name): # 创造出特征之间 进行四则变换的特征, 每两两特征之间进行变化, 并且进行变换之后把变化后的 新特证名记录下来,(知道了吧。不想之前直接一起, # 特征名都完全丢失了 new_feature = [] new_feature_name = [] for i in range(len(data[feature_name].columns ) -1): for j in range( i +1 ,len(data[feature_name].columns)): # 保存新创建的特征值和特征名 new_feature_name.append(data[feature_name].columns[i] + '/' + data[feature_name].columns[j]) new_feature_name.append(data[feature_name].columns[i] + '*' + data[feature_name].columns[j]) new_feature_name.append(data[feature_name].columns[i] + '+' + data[feature_name].columns[j]) new_feature_name.append(data[feature_name].columns[i] + '-' + data[feature_name].columns[j]) new_feature.append(data[data[feature_name].columns[i] ] /data[data[feature_name].columns[j]]) new_feature.append(data[data[feature_name].columns[i] ] *data[data[feature_name].columns[j]]) new_feature.append(data[data[feature_name].columns[i] ] +data[data[feature_name].columns[j]]) new_feature.append(data[data[feature_name].columns[i] ] -data[data[feature_name].columns[j]]) temp_data = DF(pd.concat(new_feature ,axis=1)) temp_data.columns = new_feature_name data = pd.concat([data ,temp_data] ,axis=1).reset_index(drop=True) print(data.shape) return data.reset_index(drop=True)
def read_results_text_file(results_text_file, labels, instrument_type, **kwargs): labels = pd.read_csv(labels, index_col=0, header=0) ret_DF = DF() if instrument_type == "BioTek1": data_split, Time_points = read_results_biotek1(results_text_file) elif instrument_type == "BioTek2": data_split, Time_points = read_results_biotek2(results_text_file) elif instrument_type == "BioAnalyzer": data_split, Time_points = read_results_bioanalyzer(results_text_file) for n, zz in enumerate(data_split): if zz[0] in labels.index: ret_DF.at[n, "Well"] = zz[0] ret_DF.at[n, "Drug"] = labels.loc[zz[0]].Drug ret_DF.at[n, "Concentration"] = labels.loc[zz[0]].Concentration ret_DF.at[n, "Incubation_time"] = labels.loc[zz[0]].Incubation_time ret_DF.at[n, "Sample_type"] = labels.loc[zz[0]].Type ret_DF.at[n, "Experiment"] = labels.loc[zz[0]].Experiment ret_DF.at[n, "Time_points"] = ";".join(Time_points) ret_DF.at[n, "OD"] = ";".join(blank_zero_hour(zz[1:])) else: print "%s well is not labelled correctly and will be skipped" % zz[ 0] if "seg" in kwargs.keys(): if kwargs["seg"] is True: ret_DF = ret_DF.loc[ret_DF["Experiment"].isin([kwargs["exp"]])] else: pass else: pass return ret_DF
def permute_w_replacement(frame, axis=0): ''' Permute the frame values across the given axis. Create simulated dataset were the counts of each component (column) in each sample (row), are randomly sampled from the all the counts of that component in all samples. Parameters ---------- frame : DataFrame Frame to permute. axis : {0, 1} - 0 - Permute row values across columns - 1 - Permute column values across rows Returns ------- Permuted DataFrame (new instance). ''' from numpy.random import randint axis = 1-_get_axis(axis) s = frame.shape[axis] fun = lambda x: x.values[randint(0,s,(1,s))][0] #print "FRAME: ", frame #print "TYPE OF FRAME: ", type(frame) perm = DF(frame.apply(fun, axis=axis, result_type='broadcast')) #print "PERM: ", perm #print "TYPE OF PERM: ", type(perm) return perm
def generate(path) -> DF: files = os.listdir(path) dfs = [] for f in files: if re.findall(r'\.json$', f): tab_col_name = filename_to_colname(f) with open(f, 'r') as fp: data = json.load(fp) metric = [round(d[2], 4) for d in data] step = [d[1] for d in data] df = DF(data=metric, index=step, columns=[tab_col_name]) dfs.append(df) first = DF(dfs[0]) newdf = first.join(dfs[1:], how='left') return newdf
def get_label(start_date, end_date): merge_name = ['user_id', 'day'] all_log = pd.concat( [action_log[merge_name], app_log[merge_name], video_log[merge_name]], axis=0) train_label = get_transform(all_log, start_date, end_date) train_1 = DF(list(set( train_label['user_id']))).rename(columns={0: 'user_id'}) train_1['label'] = 1 reg_temp = get_transform(register_log, 1, start_date - 1) train_1 = train_1[train_1['user_id'].isin(reg_temp['user_id'])] train_0 = DF(list(set(reg_temp['user_id']) - set(train_1['user_id']))).rename(columns={0: 'user_id'}) train_0['label'] = 0 del train_label gc.collect() return pd.concat([train_1, train_0], axis=0)
def df(self): """ convert sampler.chain into pandas.DataFrame for convenience. """ _df = DF(self.sampler.flatchain) _df = _df.rename(columns={i: key for i, key in enumerate(self.keys)}) _df["lnpost"] = self.sampler.flatlnprobability return _df
def write_mag_diff(): print('Writing: ', OFILE) # Create data frames # Note: 7*43 = 301 lst1, lst2 = np.arange(301), mag_diff_f6_f8(f606, f814,301) arr1, arr2 = np.array_split(lst1, NCOLS), np.array_split(lst2, NCOLS) df1, df2 = DF(arr1).T, DF(arr2).T # Assign column names clm = [ 'Galaxy_%d'%i for i in range(NCOLS)] df1.columns, df2.columns = clm, clm # Combine respective columns and create new df. for i in range(7): df1.insert(i*2+1,'Diff_%d'%i,df2['Galaxy_%d'%i]) # Print and write df df1.columns = [ 'Galaxy', 'Diff']*7 df1.to_csv(OFILE,float_format='%.3f',sep='\t',index=None)
def update(self): name = self.name period = self.period start = self.start data = DF(self.api.returnChartData(name, period, start, time())) out = {} for col in data.keys(): out[col] = np.array(data[col]).astype('double') return out
def parse_soup(self): self.df = DF() '''Insert the values in the original dataframe. Convert time to numerical format to make calculations easier.''' self.df.loc[:, 'start_date'] = [d['startDate'] for d in self.soup_obj] self.df.loc[:, 'end_date'] = [d['endDate'] for d in self.soup_obj] self.df.loc[:, self.measure_unit] = [d['value'] for d in self.soup_obj] self.df.loc[:, 'source'] = [d['sourceName'] for d in self.soup_obj] self.df.loc[:, self.measure_unit] = self.df[self.measure_unit].astype( float) return self.df
def _detail_area(self, area): city_row = self._city_row(area) name = city_row[0][0] # 如果地区还能细分 sp = 'children' in area.keys() if sp: for city in area['children']: city_row = city_row + self._detail_area(city) df = DF(city_row, columns=self.__header) self.area_dict[name] = df return (city_row)
def to_dataframe(self, **kwargs): """ Convert the system into a dataframe, from the `py:meth:~System.serialize` method. Args: **kwargs: arguments to be passed to `py:meth:System.serialize` method """ from pandas import DataFrame as DF df = DF(self.serialize(**kwargs)) validate_dataframe_representation(self, df) return df
def _dict2DF(self, d, noneval, dropna=False): df = DF(noneval, index=self.row_labels, columns=self.col_labels, dtype=object) for k, res in d.items(): i, j = self._positions[k] df[j][i] = res try: df = df.astype(float) except: pass if dropna: return df.dropna(axis=0, how='all').dropna(axis=1, how='all') else: return df
def plot_cm(cm, title='Confusion Matrix', display_labels=None, output_file=None): cmap = 'PuRd' cm = np.array(cm) cmpp.pretty_plot_confusion_matrix(DF(cm, index=display_labels, columns=display_labels), cmap=cmap, title=title, output_file=results_path + 'graphs/' + output_file)