def print_full(x): """Print the entire Dataframe / Series.""" pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) print(x) pd.reset_option('display.max_columns') pd.reset_option('display.max_rows')
def print_full(df): ''' print all rows of pd.DataFrame ''' pd.set_option('display.max_rows', len(df)) print(df) pd.reset_option('display.max_rows')
def print_full(x): ''' Helper function to plot the *full* dataframe. ''' pd.set_option('display.max_rows', len(x)) print(x) pd.reset_option('display.max_rows')
def dataset_bucket_analysis_by_field(field): # Set the dataset hotel_dataset_properties = {Constants.BUSINESS_TYPE_FIELD: 'fourcity_hotel'} Constants.update_properties(hotel_dataset_properties) records = ETLUtils.load_json_file(Constants.PROCESSED_RECORDS_FILE) print('Loaded %d records' % len(records)) user_frequency_map = {} for record in records: user_id = record[field] if user_id not in user_frequency_map: user_frequency_map[user_id] = 0 user_frequency_map[user_id] += 1 print('There is a total of %d %ss' % (len(user_frequency_map), field)) sorted_x = sorted(user_frequency_map.items(), key=operator.itemgetter(1), reverse=True) print(sorted_x[0]) print(sorted_x[1]) print(sorted_x[2]) # print(user_frequency_map) # Number of reviews per user rda = ReviewsDatasetAnalyzer(records) users_summary = rda.summarize_reviews_by_field(field) print('Average number of reviews per %s: %f' % (field, float(rda.num_reviews) / rda.num_users)) users_summary.plot(kind='line', rot=0) pandas.set_option('display.max_rows', len(users_summary)) print(users_summary) pandas.reset_option('display.max_rows')
def genes_from_features_index( column_wildcard ): corr_type = 'whole-gene from features' usage = 'Example Usage: top_genes_from_features/T1T3?n=10&min_exprs=100&sort_col=1' result_elements = list() (n, expression_threshold, sort_col, thr) = get_top_list_args( request.args ) some_results = gene_from_features_results.filter(regex=column_wildcard)[gene_from_features_results['mean']>expression_threshold] some_results = some_results.ix[some_results.filter(regex='_df$').min(axis=1)>1] if not n: n = some_results.shape[0] selected_column = some_results.columns[np.abs(sort_col)] some_results = some_results.sort(selected_column, ascending=n>0) n = abs(n) if thr: n = min(n, sum(some_results[selected_column]<thr) ) some_results = some_results.head( n=n ) some_results.columns = [c.replace('_', ' ') for c in some_results.columns] selected_column = some_results.columns[sort_col] gene_set_for_search = '['+''.join( ["{'gene':'"+c+"'}," for c in some_results.index])+']' scatterize_all_data = ';'.join(some_results.index) scatterize_all_link = '<a href="../scatterize_list/genes?list='+scatterize_all_data+'">Scatterize these genes.</a>' scatterize_link_notes = 'This will link to a Scatterize page with all these genes along with various behavioral and physiological measures of interst, including AT.' enrichr_all_data = ';'.join(some_results.index) enrichr_all_label = selected_column.replace(' ','_')+'_top_'+str(n)+'_genes_over_'+str(expression_threshold)+'_reads' enrichr_all_link = '<a href="../enrichr_list/'+enrichr_all_data+'?analysis_name='+enrichr_all_label+'">Enrichr these genes.</a>' enrichr_link_notes = 'This will link to Enrichr for gene enrichement analyses of the genes listed on this page.' export_all_data = ';'.join(some_results.index) export_all_label = selected_column.replace(' ','_')+'_top_'+str(n)+'_genes_over_'+str(expression_threshold)+'_reads' export_all_link = '<a href="../export_list/'+export_all_label+'.txt?list='+export_all_data+'">Export this list</a>' export_link_notes = 'This will return a .txt with the genes on this page.' some_results['Gene Name'] = ["<a href=\"/results/"+c+"\">"+c+"</a>" for c in some_results.index ] cols = some_results.columns.values cols = list(cols[-1:]) + list(cols[:-1]) some_results = some_results[cols] pd.set_option('display.max_colwidth', -1) gene_list = some_results.to_html( classes='table table-striped', escape=False, index=False) pd.reset_option('display.max_colwidth') gene_list_notes = 'The "Gene Name" links to more info on the gene.' result_elements.append( {'title': 'Top Gene List ('+column_wildcard+')', 'notes': gene_list_notes, 'content': Markup(gene_list) } ) result_elements.append( {'title': 'Scatterize feature list', 'notes': scatterize_link_notes, 'content': Markup(scatterize_all_link) } ) result_elements.append( {'title': 'Enrichr feature list', 'notes': enrichr_link_notes, 'content': Markup(enrichr_all_link) } ) result_elements.append( {'title': 'Export feature list', 'notes': export_link_notes, 'content': Markup(export_all_link) } ) this_title = selected_column return render_template('top_list.html', **locals())
def print_flux_bounds(model): """ Prints flux bounds for all reactions. """ info = [] for r in model.reactions: info.append([r.id, r.lower_bound, r.upper_bound]) df = DataFrame(info, columns=['id', 'lb', 'ub']) pd.set_option('display.max_rows', len(df)) print(df) pd.reset_option('display.max_rows')
def __str__(self): """ Currently displays Notations as a very long pandas.Series """ import pandas as pd pd.options.display.max_rows=9999 string = pd.Series(self.__dict__).__str__() pd.reset_option('max_rows') return string
def pd_display_size_set(max_columns=None, max_rows=None): if max_columns: pandas.set_option('display.max_columns', max_columns) else: pandas.reset_option('display.max_columns') if max_rows: pandas.set_option('display.max_rows', max_rows) else: pandas.reset_option('display.max_rows')
def test_repr_chop_threshold(self): df = DataFrame([[0.1, 0.5],[0.5, -0.1]]) pd.reset_option("display.chop_threshold") # default None self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1') with option_context("display.chop_threshold", 0.2 ): self.assertEqual(repr(df), ' 0 1\n0 0.0 0.5\n1 0.5 0.0') with option_context("display.chop_threshold", 0.6 ): self.assertEqual(repr(df), ' 0 1\n0 0 0\n1 0 0') with option_context("display.chop_threshold", None ): self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1')
def test_isnull_for_inf_deprecated(self): # gh-17115 s = Series(['a', np.inf, np.nan, 1.0]) with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): pd.set_option('mode.use_inf_as_null', True) r = s.isna() dr = s.dropna() pd.reset_option('mode.use_inf_as_null') e = Series([False, True, True, False]) de = Series(['a', 1.0], index=[0, 3]) tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de)
def print_fullPandas(x): """print_fullPandas is a helper function for printing a full dataframe :param x: the pandas dataframe to be printed :type x: a pandas dataframe :returns: no return, simply prints the full dataframe and then resets the default pandas print values. """ pd.set_option('display.max_rows', len(x)) pd.set_option('display.max_columns', 200) print(x) pd.reset_option('display.max_rows') pd.reset_option('display.max_columns')
def full_print(df): ''' Routine to fully print pandas.core.frame.DataFrame or pandas.core.series.Series objects'. Inputs: 1. df :: pandas.core.frame.DataFrame object containing tabular data, of size-ftactionated zooplankton biomass ''' import pandas as pd assert type(df) == type(pd.DataFrame()) or type(df) == type(pd.Series()), \ '\'df\' is not of either pandas.core.frame.DataFrame or pandas.core.series.Series types.' pd.set_option('display.max_rows', len(df)) print df pd.reset_option('display.max_rows')
def test_info_wide(self): from pandas import set_option, reset_option io = StringIO() df = DataFrame(np.random.randn(5, 101)) df.info(buf=io) io = StringIO() df.info(buf=io, max_cols=101) rs = io.getvalue() self.assertTrue(len(rs.splitlines()) > 100) xp = rs set_option('display.max_info_columns', 101) io = StringIO() df.info(buf=io) self.assertEqual(rs, xp) reset_option('display.max_info_columns')
def execute(cls, ctx, op: "DataFrameAggregate"): try: pd.set_option('mode.use_inf_as_na', op.use_inf_as_na) if op.stage == OperandStage.map: cls._execute_map(ctx, op) elif op.stage == OperandStage.combine: cls._execute_combine(ctx, op) elif op.stage == OperandStage.agg: cls._execute_agg(ctx, op) elif op.raw_func == 'size': xp = cp if op.gpu else np ctx[op.outputs[0].key] = xp.array(ctx[op.inputs[0].key].agg(op.raw_func, axis=op.axis)) \ .reshape(op.outputs[0].shape) else: ctx[op.outputs[0].key] = ctx[op.inputs[0].key].agg(op.raw_func, axis=op.axis) finally: pd.reset_option('mode.use_inf_as_na')
def print_full_dataframe(df, file_path=None): """ :param df: pandas Dataframe to print :param file_path: path where to save the Dataframe printing (optional) :return: None, but prints and save stuff to disk """ pd.set_option('display.max_rows', len(df)) print(df) if file_path is not None: orig_stdout = sys.stdout with open(file_path, 'w') as f: sys.stdout = f print(df) sys.stdout = orig_stdout pd.reset_option('display.max_rows')
def execute_2D_binning(vis: Vis): pd.reset_option("mode.chained_assignment") with pd.option_context("mode.chained_assignment", None): x_attr = vis.get_attr_by_channel("x")[0].attribute y_attr = vis.get_attr_by_channel("y")[0].attribute vis._vis_data["xBin"] = pd.cut(vis._vis_data[x_attr], bins=lux.config.heatmap_bin_size) vis._vis_data["yBin"] = pd.cut(vis._vis_data[y_attr], bins=lux.config.heatmap_bin_size) color_attr = vis.get_attr_by_channel("color") if len(color_attr) > 0: color_attr = color_attr[0] groups = vis._vis_data.groupby( ["xBin", "yBin"], history=False)[color_attr.attribute] if color_attr.data_type == "nominal": # Compute mode and count. Mode aggregates each cell by taking the majority vote for the category variable. In cases where there is ties across categories, pick the first item (.iat[0]) result = groups.agg([ ("count", "count"), (color_attr.attribute, lambda x: pd.Series.mode(x).iat[0]), ]).reset_index() elif color_attr.data_type == "quantitative" or color_attr.data_type == "temporal": # Compute the average of all values in the bin result = groups.agg([("count", "count"), (color_attr.attribute, "mean") ]).reset_index() result = result.dropna() else: groups = vis._vis_data.groupby(["xBin", "yBin"], history=False)[x_attr] result = groups.count().reset_index(name=x_attr) result = result.rename(columns={x_attr: "count"}) result = result[result["count"] != 0] # convert type to facilitate weighted correlation interestingess calculation result["xBinStart"] = result["xBin"].apply( lambda x: x.left).astype("float") result["xBinEnd"] = result["xBin"].apply(lambda x: x.right) result["yBinStart"] = result["yBin"].apply( lambda x: x.left).astype("float") result["yBinEnd"] = result["yBin"].apply(lambda x: x.right) vis._vis_data = result.drop(columns=["xBin", "yBin"])
def get_print_full(x): """Same as print_full, but returns string.""" # Change the output to capture the string instead of sending it to the console old_stdout = sys.stdout sys.stdout = newstdout = StringIO() # capture all print try: pd.set_option("display.max_rows", len(x)) print(x) string = newstdout.getvalue() # Get string output except: raise finally: sys.stdout = old_stdout pd.reset_option("display.max_rows") return string[:-1]
def print_full(data): """Prints a dataframe at full width. Useful for peeking at the data for debugging. Parameters ---------- data : Pandas dataframe The experimental results Returns ------- Nothing """ pd.set_option('display.max_rows', len(data), "display.max_columns", 500, "display.width", 1000) print(data) pd.reset_option('display.max_rows')
def dfn(*x): pd.reset_option('display.max_columns') pd.reset_option('display.max_rows') leng = len(x) df_concat = [] for i in range(leng): row=len(x[0]) blank = ['']*row tabn = '{'+str(i+1)+'}' blank = pd.DataFrame(blank,columns=[tabn]) xx = pd.DataFrame(x[i]) if(i==0): df_concat = pd.concat([xx,blank], axis=1) else: df_concat = pd.concat([df_concat,xx,blank], axis=1) df_concat.replace(np.nan, '', inplace=True) display(df_concat)
def test_info_wide(self): from pandas import set_option, reset_option io = StringIO() df = DataFrame(np.random.randn(5, 101)) df.info(buf=io) io = StringIO() df.info(buf=io, max_cols=101) rs = io.getvalue() assert len(rs.splitlines()) > 100 xp = rs set_option("display.max_info_columns", 101) io = StringIO() df.info(buf=io) assert rs == xp reset_option("display.max_info_columns")
def Hitting_league_leaders(): con = sqlite3.connect("MLB_Stats.sqlite") #connect to database print("Hitting leaders from around the league:\n") AVG = pd.read_sql_query("SELECT Name, AVG, Team FROM Batting_Stats WHERE AB >45 ORDER BY AVG DESC LIMIT 1",con) H = pd.read_sql_query("SELECT Name, H, Team FROM Batting_Stats ORDER BY H DESC LIMIT 1",con) Dbl = pd.read_sql_query('SELECT Name, "2B", Team FROM Batting_Stats ORDER BY "2B" DESC LIMIT 1',con) Trip = pd.read_sql_query('SELECT Name, "3B", Team FROM Batting_Stats ORDER BY "3B" DESC LIMIT 1',con) HR = pd.read_sql_query("SELECT Name, HR, Team FROM Batting_Stats ORDER BY HR DESC LIMIT 1",con) RBI = pd.read_sql_query("SELECT Name, RBI, Team FROM Batting_Stats ORDER BY RBI DESC LIMIT 1",con) pd.set_option('display.max_colwidth', 40) pd.options.display.float_format = '{:,.3f}'.format print("Average:\n",AVG.to_string(index=False),"\n") pd.reset_option('display.float_format') print("Hits:\n",H.to_string(index=False),"\n") print("Doubles:\n",Dbl.to_string(index=False),"\n") print("Triples:\n",Trip.to_string(index=False),"\n") print("Home Runs:\n",HR.to_string(index=False),"\n") print("Runs Batted In:\n",RBI.to_string(index=False),"\n")
def abundance(valueTable, numbins): queryCounts = valueTable['qseqid'].value_counts() binsize = int(math.ceil(float(queryCounts.max()) / numbins)) print(queryCounts[queryCounts > 1200].index.values) abundanceDict = {} lastAmount = 0 for i in range(1, (numbins + 1)): currentBin = binsize * i currentAmount = queryCounts[queryCounts <= currentBin].count() abundanceDict[currentBin] = currentAmount - lastAmount lastAmount = currentAmount x = pd.Series(abundanceDict).sort_index() pd.set_option('display.max_rows', len(x)) print(x) pd.reset_option('display.max_rows') return x
def print_full(df: pd.DataFrame, num_rows: int = 100) -> None: '''Print the first num_rows rows of dataframe in full Resets display options back to default after printing ''' pd.set_option('display.max_rows', len(df)) pd.set_option('display.max_columns', None) pd.set_option('display.width', 2000) pd.set_option('display.float_format', '{:20,.2f}'.format) pd.set_option('display.max_colwidth', -1) display(df.iloc[0:num_rows]) pd.reset_option('display.max_rows') pd.reset_option('display.max_columns') pd.reset_option('display.width') pd.reset_option('display.float_format') pd.reset_option('display.max_colwidth') return None
def value_iteration(a=0.8, b=0.1): R = gridWorld() discount = 0.99 values = [[] for i in range(17)] actions = [[] for i in range(17)] for i in range(17): values[i] += [R[i]] for t in range(1, 40): terminate = True for i in range(17): if i == 16: values[i] += [0] actions[i] += [None] break maxreward = -1 * float('inf') bestaction = None for action in [up, down, left, right]: actionsum = 0 for transition in get_new_stage_and_probs(i, action, a, b): prob = transition[1] new_state = transition[0] reward = R[i] actionsum += prob * (reward + discount * values[new_state][t - 1]) if actionsum >= maxreward: maxreward = actionsum bestaction = action values[i] += [maxreward] if values[i][t] - values[i][t - 1] > 0.01: terminate = False actions[i] += [bestaction] if terminate: #terminate check break values = np.array(values) actions = np.array(actions) import pandas as pd pd.set_option('display.max_columns', 28) pd.set_option('display.max_rows', 28) print pd.DataFrame(values) print pd.DataFrame(actions) pd.reset_option('display.max_columns')
def main(): Options.load() if Options.files['metadatajson'] is None: qStr = 'select ' + Options.query['select'] + ' where ' + Options.query['where'] data = VavDataReader.importVavData(server=Options.query['client'], query=qStr) else: with open(Options.files['metadatajson']) as data_file: data = json.load(data_file) data_file.close() for key in data: data[key] = rename_sensors(data[key]) if Options.files['outputjson'] is not None: VavDataReader.dictToJson(data, Options.files['outputjson']) if Options.files['outputcsv'] is not None or Options.output['printtoscreen']: print "Preprocessing finished. Processing now." if Options.output['vav'] is None: processed = processdata(data, Options.query['client']) else: processed = processdata(data, Options.query['client'], Options.output['vav']) print "Done processing." if Options.output['printtoscreen']: pd.set_option('display.max_rows', len(processed)) print processed pd.reset_option('display.max_rows') if Options.files['outputcsv'] is not None: processed.to_csv(Options.files['outputcsv']) elif Options.files['outputjson'] is None: sys.stderr.write("ERROR: No output specified.\n" "In config file, at least one of the following should" " be true:\n" "- outputJSON is set to something other than None\n" "- outputCSV is set to something other than None\n" "- printToScreen is set to True.\n" "Please modify the config file to satisfy at least " "one of these.\n") sys.stderr.flush() sys.exit(1) print 'Done.'
def print_full(x): pd.set_option('display.max.rows', len(x)) pd.set_option('display.max_columns', None) pd.set_option('display.width', 2000) # pd.set_option('display.float_format', '{:20, .2f}'.format) pd.set_option('display.max_colwidth', -1) print(x) pd.reset_option('display.max_rows') pd.reset_option('display.max_columns') pd.reset_option('display.width') # pd.reset_option('display.float_format') pd.reset_option('display.max_colwidth')
def finish_log(): global log pd.set_option('display.max_colwidth', 1000) add_line_log('INFO', error_message='Finishing logging session') if (log.error_level >= MINIMUM_ERROR_LEVEL_TO_SEND_EMAIL ).any() and mc_email.IS_SENDING_EMAIL: date_yesterday = (dt.datetime.now() - dt.timedelta(days=1)).strftime('%Y-%m-%d') mc_email.send_email( body=log.to_html(), subject='Failure in meteo station : {}'.format(date_yesterday), list_figures=log.figure.iteritems()) add_line_log('INFO', error_message='E-mail sent to: {}'.format( mc_email.RECIPIENTS_EMAIL)) else: add_line_log('INFO', error_message='E-mail not sent') if (log.error_level == 'INFO').all(): print( '\n>> ALL THE LOG ISSUES ARE INFORMATIONAL - NO WARNING EMAIL SHOULD BE SENT' ) working_path = os.getcwd( ) # tries to read config file from the Current Working Directory where meteocheck is invoked log.to_csv(str(Path(working_path, FILENAME_SESSION_LOG)), sep='\t', index=False, header=False, mode='w') log.to_csv(str(Path(working_path, FILENAME_HISTORY_LOG)), sep='\t', index=False, header=False, mode='a') pd.reset_option('display.max_colwidth')
def full_preprocessing(self, normalize, missing_strat, process_strat, label_age, label_gender, label_id, print_missing=True, print_columns=False, poly_degree=2): self.preprocessing(label_id) if normalize: self.normalize_age(label_age, label_gender) # We print the distribution of missing values (8) if print_missing: print('printing missing') missing_val_columns = self.apply(pd.value_counts)[8:] pd.set_option('display.max_columns', missing_val_columns.shape[1]) print(missing_val_columns) pd.reset_option('display.max_columns') if missing_strat == 'Binary': # We create the binary columns self.create_missing_data_col() if missing_strat in ['Replacement', 'Binary']: # We replace missing values in the ADOS answers (8) by 0 self.replace(8, 0, inplace=True) if print_columns: print(self.columns) if process_strat in ['pca_comp']: self.create_components_feat() if process_strat in ['indicator', 'interaction_ind']: self.create_indicators_columns() if process_strat in ['poly', 'interaction_ind']: self.create_poly_columns(poly_degree) if print_columns: print(self.columns) self.drop_constant_columns()
def CompareXlsx(filename): #Program begin Gou = [] for fid, fname in enumerate(filename): df = pd.read_excel(fname, sheetname='Sheet1', header=None, parse_cols=[8, 9]) df.columns = ['ID', 'name'] df.sort_values(by='ID', inplace=True) df.dropna(inplace=True) df.drop_duplicates(subset='ID', inplace=True) df.reset_index(drop=True, inplace=True) for idnum in range(len(df['ID']) - 1, -1, -1): if isinstance(df['ID'][idnum], int): break df.drop(idnum, inplace=True) excel = df.reset_index(drop=True) Gou.append(excel['ID']) del excel del df com1 = Gou[1][Gou[1].isin(Gou[0])] for comnum in range(2, len(filename)): com2 = Gou[comnum].isin(com1) if 0 == sum(com2): com1 = [] print('Output:') print('No Same data') return com1 = com1[com2] del Gou com1.reset_index(drop=True, inplace=True) samenum = len(com1) print('Output:') print('SAME:') pd.set_option('display.max_rows', samenum) print(com1) pd.reset_option('display.max_rows') print('NUM:%d' % samenum) com1.to_csv('compare.csv', index=False)
def execute(cls, ctx, op): try: pd.set_option('mode.use_inf_as_na', op.use_inf_as_na) if op.stage == OperandStage.map: cls._execute_map(ctx, op) elif op.stage == OperandStage.combine: cls._execute_combine(ctx, op) else: input_data = ctx[op.inputs[0].key] value = getattr(op, 'value', None) if isinstance(op.value, (Base, Entity)): value = ctx[op.value.key] ctx[op.outputs[0].key] = input_data.fillna( value=value, method=op.method, axis=op.axis, limit=op.limit, downcast=op.downcast) finally: pd.reset_option('mode.use_inf_as_na')
def pprint_parts(self): """Pretty print the parts of the energy system to the console.""" df_parts = self.list_parts() # Get DataFrame with all parts A = self.calc_investment() A_funding = self.calc_investment(include_funding=True) pd.set_option('precision', 2) # Set the number of decimal points pd.set_option('display.float_format', self.f_space) print('------------- List of parts -------------') print(df_parts.to_string()) print('-----------------------------------------') print('Total investment costs: ', self.f_space(A)) if A != A_funding: print('Investment after funding: ', self.f_space(A_funding)) print('-----------------------------------------') pd.reset_option('precision') # ...and reset the setting from above pd.reset_option('display.float_format') return df_parts
def print_results(results): """Print GRAFIMO results on terminal without storing them on the three files (TSV, HTML, GFF3) Parameters ---------- results : pandas.DataFrame GRAFIMO results """ if not isinstance(results, pd.DataFrame): errmsg: str = "\n\nERROR: the results must be stored in a pandas DataFrame" raise NoDataFrameException(errmsg) # little hack in pd df parameters to avoid the weird default # print of a DataFrame (cut the majority of lines) pd.set_option("display.max_rows", len(results)) print() # newline print(results) pd.reset_option("display.max_rows")
def write_to_html(self): pandas.set_option('display.max_colwidth', -1) header = '{!s}'.format(self.df.index.tolist()[0]) df = self.df.reset_index(level=['Clf.', 'Set_Type', 'Eval.']) if '#Rep.' in df: df.drop('#Rep.', 1, inplace=True) df.drop('Eval.', 1, inplace=True) df.drop('Set_Size', 1, inplace=True) df.drop('Set_Type', 1, inplace=True) df.drop('f1', 1, inplace=True) df.drop('precision', 1, inplace=True) df.columns = [ 'Clf', '\\ac{DGA} Type', '\\ac{ACC}', '\\ac{TPR}', '\\ac{TNR}', '\\ac{FNR}', '\\ac{FPR}' ] fname = settings.ANALYSIS_FOLDER + '/eval_full.html' with open(fname, 'w') as f: f.write(df.to_html()) pandas.reset_option('display.max_colwidth')
def generatestockfile(stockslist): # Fetch the data date1 = '2000-01-01' date2 = '2020-12-01' data = yf.download(stockslist, date1, date2) '''data['Adj Close'].plot() plt.show()''' # Print first 5 rows of the data #print(data.head(5)) for company in stockslist: print(company) datas = yf.download(company, date1, date2) #gets the data of the company between the given dates. print("datas:\n", datas) pd.set_option('display.max_rows', len(datas)) #this is here to get all of the data shown in the text file. #the following section is to put the data into a text file: with open('rawdata/' + company + '.stock.txt', 'w') as stocks: print(datas, file=stocks) pd.reset_option('display.max_rows') #reset the length
def restructure_data(): r""" Restructures data read from a csv file. Method creates a two-dimensional DataFrame containing the power coefficient curve or power curve of the requested wind turbine. Returns ------- Tuple (pandas.DataFrame, float) Power curve or power coefficient curve (pandas.DataFrame) and nominal power (float). Power (coefficient) curve DataFrame contains power coefficient curve values (dimensionless) or power curve values in W with the corresponding wind speeds in m/s. """ df = read_turbine_data(filename=filename) wpp_df = df[df.turbine_id == self.turbine_name] # if turbine not in data file if wpp_df.shape[0] == 0: pd.set_option('display.max_rows', len(df)) logging.info('Possible types: \n{0}'.format(df.turbine_id)) pd.reset_option('display.max_rows') sys.exit('Cannot find the wind converter type: {0}'.format( self.turbine_name)) # if turbine in data file write power (coefficient) curve values # to 'data' array ncols = ['turbine_id', 'p_nom', 'source', 'modificationtimestamp'] data = np.array([0, 0]) for col in wpp_df.keys(): if col not in ncols: if wpp_df[col].iloc[0] is not None and not np.isnan( float(wpp_df[col].iloc[0])): data = np.vstack( (data, np.array([float(col), float(wpp_df[col])]))) data = np.delete(data, 0, 0) df = pd.DataFrame(data, columns=['wind_speed', 'values']) nominal_power = wpp_df['p_nom'].iloc[0] return df, nominal_power
def order_table(orders, name): asks = orders['asks'] asks = pd.DataFrame(asks, columns=['price', 'amount']) asks.index = asks.index + 1 asks = asks.sort_index() bids = orders['bids'] bids = pd.DataFrame(bids, columns=['price', 'amount']) bids.index = bids.index + 1 bids = bids.sort_index() print(name + ':') table = pd.concat([asks, bids], axis=1) #set display option pd.set_option('display.max_rows', len(table)) print(table) #reset pd.reset_option('display.max_rows') print()
def write_clean_tokens_to_file(data, pos, multiword, punc_marks, stopwords, window): path_extras = get_path_extras(multiword, punc_marks, stopwords, window) pd.set_option('display.max_rows', len(data)) df = pd.DataFrame([data]) df.to_csv(PREP_TRAINING_DATA_PATH + path_extras + '.csv', mode='a', float_format='%.5f', na_rep="NAN!", header=False, index=False, line_terminator="") df = pd.DataFrame([pos]) df.to_csv(POS_TRAINING_DATA_PATH + path_extras + '.csv', mode='a', float_format='%.5f', na_rep="NAN!", header=False, index=False, line_terminator="") pd.reset_option('display.max_rows')
def display(X, rows=None, where="inline", name="df"): if (rows == 'all'): rows = 2000 elif (type(rows) is int): rows *= 2 else: rows = 10 if isinstance(X, pd.DataFrame) or isinstance( X, pd.Series) or (isinstance(X, np.ndarray) and X.ndim <= 2): X = pd.DataFrame(X) if (where == "popup"): filename = name + ".html" X.to_html(filename) webbrowser.open(filename, new=2) else: pd.set_option('display.max_rows', rows) ipd.display(X) pd.reset_option('display.max_rows') else: print(X)
def execute_2D_binning(vis: Vis): pd.reset_option('mode.chained_assignment') with pd.option_context('mode.chained_assignment', None): x_attr = vis.get_attr_by_channel("x")[0] y_attr = vis.get_attr_by_channel("y")[0] vis._vis_data.loc[:,"xBin"] = pd.cut(vis._vis_data[x_attr.attribute], bins=30) vis._vis_data.loc[:,"yBin"] = pd.cut(vis._vis_data[y_attr.attribute], bins=30) groups = vis._vis_data.groupby(['xBin','yBin'])[x_attr.attribute] result = groups.agg("count").reset_index() # .agg in this line throws SettingWithCopyWarning result = result.rename(columns={x_attr.attribute:"z"}) result = result[result["z"]!=0] # convert type to facilitate weighted correlation interestingess calculation result.loc[:,"xBinStart"] = result["xBin"].apply(lambda x: x.left).astype('float') result.loc[:,"xBinEnd"] = result["xBin"].apply(lambda x: x.right) result.loc[:,"yBinStart"] = result["yBin"].apply(lambda x: x.left).astype('float') result.loc[:,"yBinEnd"] = result["yBin"].apply(lambda x: x.right) vis._vis_data = result.drop(columns=["xBin","yBin"])
def print_predictions_comparison(df, predictions, label_name, num_of_rows=10): """ Print predictions next to actual values :param df: Pandas DataFrame containing the data :type df: pandas.DataFrame :param predictions: Array holding the predictions :type predictions: array :param label_name: Target label :type label_name: str :param num_of_rows: Number of rows to diplay :type num_of_rows: int """ pd.set_option('display.max_rows', num_of_rows) if len(df) != len(predictions): print "\n### Error: Length of values does not match\n" return print "\n\n### Compare predictions to actual: ###\n" df['predictions'] = predictions print df[["predictions", label_name]][0:num_of_rows] print "###########\n\n" pd.reset_option('display.max_rows')
def __append_dataframe(self, data: PandasDataFrame, max_line_width: int = 100000000, **kwargs: Any): """Append to CSV file using dataframe. :param data: the dataframe to write. :param max_line_width: max line width (PANDAS: display.width). :param kwargs: any other arguments that the selected writer may accept. """ self.__logger.debug("Append a Dataframe to the CSV file.") kwargs["header"] = False if "header" not in kwargs.keys( ) else kwargs["header"] kwargs["index"] = False if "index" not in kwargs.keys( ) else kwargs["index"] try: with open(self.__path, 'a') as f: pd.set_option("display.width", max_line_width) pd.set_option("display.max_rows", data.shape[0]) pd.set_option("display.max_columns", data.shape[1]) data.to_csv(f, header=kwargs["header"], index=kwargs["index"]) pd.reset_option("display.width") pd.reset_option("display.max_rows") pd.reset_option("display.max_columns") except (): self.__logger.error(__name__ + " - Can not append dataframe to file: \n" + self.__path) sys.exit()
def combine_results_stratified(var: str, outputs_dir: str, BCN: str, duration: int, hydrology_IDs: list, run_dur_dic: dict=None, remove_ind_dur: bool = True) -> dict: '''Combines the excess rainfall *.csv files for each duration into a single dictionary for all durations. A small value of 0.0001 is added so the result is not printed in scientific notation. ''' pd.reset_option('^display.', silent=True) assert var in ['Excess_Rainfall', 'Weights'], 'Cannot combine results' dic = {} df_lst = [] for ID in hydrology_IDs: scen = '{0}_Dur{1}_Hydro{2}'.format(BCN, duration, ID) file = outputs_dir/'{}_{}.csv'.format(var, scen) df = pd.read_csv(file, index_col = 0) if var == 'Excess_Rainfall': df_dic = df.to_dict() dates = list(df.index) ordin = df.index.name.title() events = {} for k, v in df_dic.items(): if 'E' in k: m = list(v.values()) m1= [ float(i)+0.0001 if float(i)< 0.0001 and 0< float(i) else float(i) for i in m] events[k] = m1 key ='H{0}'.format(str(ID).zfill(2)) val = {'time_idx_ordinate': ordin, 'run_duration_days': run_dur_dic[str(duration)], 'time_idx': dates, 'pluvial_BC_units': 'inch/ts', 'BCName': {BCN: events}} dic[key] = val elif var == 'Weights': df_lst.append(df) if remove_ind_dur: os.remove(file) if var == 'Weights': all_dfs = pd.concat(df_lst) weights_dic = all_dfs.to_dict() dic = {'BCName': {BCN: weights_dic['Weight']}} #print('Total Weight:', all_dfs['Weight'].sum()) return dic
def get_tweets(search_term, items = 1000, incl_retweets = False): """Uses the tweepy api to get tweets Parameters ---------- search_term (string) The hashtag that you want to search Twitter for items (int) The amount of tweets to return, if available. Max set to 1000 incl_retweets (boolean) Whether or not to include retweets Returns ------- Returns an object of tweets """ pd.reset_option('^display.', silent=True) # Clean input search_term = search_term.strip() if len(search_term.split()) > 1: search_term = search_term.replace(' ','+') if items > 1000: items = 1000 print('Using max items of 1000.') if incl_retweets: q_string = '#' + search_term else: q_string = '#' + search_term + ' -filter:retweets' tweets = tweepy.Cursor(api.search, q=search_term, lang="en", since=str(now.year)+'-01-01').items(items) return tweets
def CompareXlsx(filename): #Program begin Gou = [] for fid,fname in enumerate(filename): df=pd.read_excel(fname,sheetname='Sheet1',header=None,parse_cols=[8,9]) df.columns=['ID','name'] df.sort_values(by='ID',inplace=True) df.dropna(inplace=True) df.drop_duplicates(subset='ID',inplace=True) df.reset_index(drop=True,inplace=True) for idnum in range(len(df['ID'])-1,-1,-1): if isinstance(df['ID'][idnum], int): break df.drop(idnum,inplace=True) excel = df.reset_index(drop=True) Gou.append(excel['ID']) del excel del df com1=Gou[1][Gou[1].isin(Gou[0])] for comnum in range(2,len(filename)): com2=Gou[comnum].isin(com1) if 0==sum(com2): com1=[] print('Output:') print('No Same data') return com1=com1[com2] del Gou com1.reset_index(drop=True,inplace=True) samenum=len(com1) print('Output:') print('SAME:') pd.set_option('display.max_rows',samenum) print(com1) pd.reset_option('display.max_rows') print('NUM:%d' %samenum) com1.to_csv('compare.csv',index=False)
def get_cheapest_spot_instance(client): prices = get(client, 'describe_spot_price_history', {#'InstanceTypes': instance_types, 'StartTime' : datetime.now() - timedelta(days=1) }) #convert the list of lists into a list of prices prices = list(itertools.chain(*[pl['SpotPriceHistory'] for pl in prices] )) #prices = list(prices) prices_df = pd.DataFrame.from_dict(prices) prices_df = prices_df[ prices_df.ProductDescription == 'Linux/UNIX' ] grouped = prices_df.groupby(['AvailabilityZone', 'InstanceType', 'ProductDescription']) first = grouped.first() first.sort_values(by=["SpotPrice"], inplace=True) first.reset_index(inplace=True) pd.set_option('display.max_rows', len(first)) print first pd.reset_option('display.max_rows') return first.to_dict(orient='records')
def main(market_plate=u'创业板', filter_ruihua=True): stocks = query_market_plate_stock(market_plate, filter_ruihua) plate_stocks = [] for i in stocks: sdt = query_latest_trading(i.stock_number) if sdt.today_closing_price > 0: item = {'stock_number': i.stock_number, 'stock_name': i.stock_name, 'increase_rate': sdt.increase_rate, 'today_closing_price': sdt.today_closing_price} plate_stocks.append(item) plate_stocks = sorted(plate_stocks, key=lambda stock: float(stock.get('increase_rate').replace('%', '')), reverse=True) print market_plate print len(plate_stocks) print '---------------------------------------------------' frame = DataFrame(plate_stocks).set_index('stock_number').reindex(columns=['stock_name', 'today_closing_price', 'increase_rate']) pd.set_option('display.max_rows', len(plate_stocks)) print frame pd.reset_option('display.max_rows')
def print_full_dataframe(df): pd.set_option('display.max_rows', len(df)) pd.set_option('max_colwidth', 50) pd.set_option('display.width', 0) print(df) pd.reset_option('display.max_rows') pd.reset_option('max_colwidth') pd.reset_option('display.width')
def print_full(x): "Print a dataframe in full, i.e. without skipping rows and inserting some (...)." pd.set_option('display.max_rows', len(x)) print x pd.reset_option('display.max_rows')
drinks.assign(servings = drinks.beer + drinks.spirit + drinks.wine) # limit which rows are read when reading in a file pd.read_csv('drinks.csv', nrows=10) # only read first 10 rows pd.read_csv('drinks.csv', skiprows=[1, 2]) # skip the first two rows of data # write a DataFrame out to a CSV drinks.to_csv('drinks_updated.csv') # index is used as first column drinks.to_csv('drinks_updated.csv', index=False) # ignore index # save a DataFrame to disk (aka 'pickle') and read it from disk (aka 'unpickle') drinks.to_pickle('drinks_pickle') pd.read_pickle('drinks_pickle') # randomly sample a DataFrame train = drinks.sample(frac=0.75, random_state=1) # will contain 75% of the rows test = drinks[~drinks.index.isin(train.index)] # will contain the other 25% # change the maximum number of rows and columns printed ('None' means unlimited) pd.set_option('max_rows', None) # default is 60 rows pd.set_option('max_columns', None) # default is 20 columns print drinks # reset options to defaults pd.reset_option('max_rows') pd.reset_option('max_columns') # change the options temporarily (settings are restored when you exit the 'with' block) with pd.option_context('max_rows', None, 'max_columns', None): print drinks
drinks.loc[drinks.beer_servings.between(201, 400), "beer_level"] = "high" # change 201-400 to 'high' # display a cross-tabulation of two Series pd.crosstab(drinks.continent, drinks.beer_level) # convert 'beer_level' into the 'category' data type (new in pandas 0.15.0) drinks["beer_level"] = pd.Categorical(drinks.beer_level, categories=["low", "med", "high"]) drinks.sort_index(by="beer_level") # sorts by the categorical ordering (low to high) # create dummy variables for 'continent' and add them to the DataFrame cont_dummies = pd.get_dummies(drinks.continent, prefix="cont").iloc[:, 1:] # exclude first column drinks = pd.concat([drinks, cont_dummies], axis=1) # axis=0 for rows, axis=1 for columns # randomly sample a DataFrame mask = np.random.rand(len(drinks)) < 0.66 # create a Series of booleans train = drinks[mask] # will contain about 66% of the rows test = drinks[~mask] # will contain the remaining rows # change the maximum number of rows and columns printed ('None' means unlimited) pd.set_option("max_rows", None) # default is 60 rows pd.set_option("max_columns", None) # default is 20 columns print drinks # reset options to defaults pd.reset_option("max_rows") pd.reset_option("max_columns") # change the options temporarily (settings are restored when you exit the 'with' block) with pd.option_context("max_rows", None, "max_columns", None): print drinks
# widen the column display pd.set_option('max_colwidth', 500) # negative sentiment in a 5-star review yelp[(yelp.stars == 5) & (yelp.sentiment < -0.3)].head(1) # positive sentiment in a 1-star review yelp[(yelp.stars == 1) & (yelp.sentiment > 0.5)].head(1) # reset the column display width pd.reset_option('max_colwidth') # ## Bonus: Adding Features to a Document-Term Matrix # create a DataFrame that only contains the 5-star and 1-star reviews yelp_best_worst = yelp[(yelp.stars==5) | (yelp.stars==1)] # define X and y feature_cols = ['text', 'sentiment', 'cool', 'useful', 'funny'] X = yelp_best_worst[feature_cols] y = yelp_best_worst.stars # split into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
def print_full(x,y): pandas.set_option('display.max_rows', y) print(x) pandas.reset_option('display.max_rows')
def full_print(df): import pandas as pd pd.set_option('display.max_rows', len(df)) print df pd.reset_option('display.max_rows')
def print_full(x): pd.set_option('display.max_rows', len(x),'display.max_columns', len(x.columns)) print(x) pd.reset_option('display.max_rows','display.max_columns')
item = {u'stock_number': i.stock_number, u'stock_name': i.stock_name.encode('utf-8'), u'increase_rate': sdt.increase_rate, u'today_closing_price': sdt.today_closing_price} plate_stocks.append(item) plate_stocks = sorted(plate_stocks, key=lambda stock: float(stock.get('increase_rate').replace('%', '')), reverse=True) print market_plate print len(plate_stocks) if len(plate_stocks): print '---------------------------------------------------' frame = DataFrame(plate_stocks).set_index('stock_number').reindex(columns=['stock_name', 'today_closing_price', 'increase_rate']) pd.set_option('display.max_rows', len(plate_stocks)) print frame pd.reset_option('display.max_rows') def setup_argparse(): parser = argparse.ArgumentParser(description=u'查询某个板块所对应的股票') parser.add_argument(u'-m', action=u'store', dest='market_plate', required=True, help=u'需要查询的板块') parser.add_argument(u'-f', action=u'store_true', dest='filter_rh', help=u'如果添加这个参数,则在结果里会过滤瑞华的客户') args = parser.parse_args() return args.market_plate, args.filter_rh if __name__ == '__main__': setup_logging(__file__, logging.WARNING) market_plate, filter_rh = setup_argparse() if isinstance(market_plate, str):
def print_full(x): #print all rows of a panda dataframe pd.set_option('display.max_rows', len(x)) print(x) pd.reset_option('display.max_rows')
def print_full(x): pd.set_option('display.max_rows', len(x)) print(x) pd.reset_option('display.max_rows')
def print_full(x): pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) print(x) pd.reset_option('display.max_rows') pd.reset_option('display.max_columns')
def main(args=None): config = xe.get_config(args.config) session = xe.get_xnat_session(config) if args.update: # Update the cache of XNAT Experiment XML files xe.extract_experiment_xml(config, session, args.experimentsdir, args.num_extract) # extract info from the experiment XML files experiment = xe.get_experiments_dir_info(args.experimentsdir) experiment_df = xe.experiments_to_dataframe(experiment) reading = xe.get_experiments_dir_reading_info(args.experimentsdir) reading_df = xe.reading_to_dataframe(reading) experiment_reading = inner_join_dataframes(experiment_df, reading_df) # exclude phantoms, but include the traveling human phantoms site_id_pattern = '[A-EX]-[0-9]{5}-[MFT]-[0-9]' df = experiment_reading[experiment_reading.site_id.str.contains(site_id_pattern)] result = None if args.report_type == 'no_findings_date': # Findings are listed without a findings date result = findings_date_empty(df) if args.set_findings_date: # Update the findings date to equal the date to dvd update_findings_date(args.config, result) elif args.report_type == 'no_findings': # Findings is empty but a date is listed result = findings_empty(df) elif args.report_type == 'no_findings_or_date': # Both the findings and findings date are empty result = findings_and_date_empty(df) if args.reset_datetodvd: record = result[result.experiment_id == experiment] project = record.project.values[0] subject = record.subject_id.values[0] experiment = args.reset_datetodvd set_experiment_attrs(args.config, project, subject, experiment, 'datetodvd', 'none') elif args.report_type == 'correct_dvd_date': dates_df = pd.read_csv(args.file_to_reset_datetodvd) result = pd.DataFrame(index=['Subject'], columns=['project', 'subject_id', 'experiment_id', 'site_experiment_id', 'datetodvd', 'findingsdate']) result = result.fillna(0) for subject in df['subject_id'].tolist(): if subject in dates_df['mri_xnat_sid'].tolist(): if args.verbose: print "Checking for {}".format(subject) eids = dates_df[dates_df['mri_xnat_sid'] == subject]['mri_xnat_eids'].tolist() date = dates_df[dates_df['mri_xnat_sid'] == subject]['mri_datetodvd'].tolist() if eids != []: if len(eids[0]) == 13: experiment = eids[0] record = df[df.experiment_id == experiment] record_date = record['datetodvd'].tolist() if date != [] and record_date != []: if record_date[0] != date[0] or type(record_date[0]) != str() : project = record.project.values[0] subject = record.subject_id.values[0] experiment = record.experiment_id.values[0] set_experiment_attrs(args.config, project, subject, experiment, 'datetodvd', date[0]) elif len(eids[0]) == 27 or eids == None: experiment = eids[0].split(" ") for e in experiment: record_date = record['datetodvd'].tolist() record = df[df.experiment_id == e] if date != [] and record_date != []: if record_date[0] != date[0] or type(record_date[0]) == str(): project = record.project.values[0] subject = record.subject_id.values[0] set_experiment_attrs(args.config, project, subject, e, 'datetodvd', date[0]) elif args.report_type == 'no_findings_before_date': # Findings and Findings Date is empty before a given date if not args.before_date: raise(Exception("Please set --before-date YYYY-MM-DD when running the no_findings_before_date report.")) has_dvd_before_date = check_dvdtodate_before_date(df, before_date=args.before_date) result = findings_and_date_empty(has_dvd_before_date) result.to_csv(args.outfile, index=False) else: raise(NotImplementedError("The report you entered is not in the list.")) result.to_csv(args.outfile, columns=['project', 'subject_id', 'experiment_id', 'site_experiment_id', 'datetodvd', 'findingsdate'], index=False) if verbose: pd.set_option('display.max_rows', len(result)) print("Total records found: {}".format(len(result))) print(result[['experiment_id', 'site_experiment_id']]) pd.reset_option('display.max_rows') print("Finished!")