def main(): # Zacks = DataBox(zacksfile,zackscolfixes,zackscoldeletes) # print Calscores() pd.show_versions()
def info_pandas_dependencies(self) -> str: """Return versions of pandas dependencies as a string.""" from contextlib import redirect_stdout from io import StringIO # pandas show_versions prints output directly. Redirect this output # to a string for further processing. versions = StringIO() with redirect_stdout(versions): pd.show_versions(as_json=False) result = versions.getvalue().replace(" ", "") # Put items in dictionary for easy processing. result_dict = {} for line in result.splitlines(): if line.find(":") != -1: items = line.split(":") result_dict[items[0]] = items[1] # Construct result string (key : value for each pair in dict) result_string = "" key_max_len = len(sorted(result_dict, key=len, reverse=True)[0]) for key in sorted(result_dict, key=str.lower): result_string += key.ljust(key_max_len + 1) + ": " + result_dict[key] + "\n" return result_string.strip("\n")
def show_version(): """print `supy` and `supy_driver` version information. """ print('SuPy versions') print('-------------') print(f"supy: {__version__}") print(f"supy_driver: {__version_driver__}") print('\n=================') print('SYSTEM DEPENDENCY') pd.show_versions()
def test_json_output_match(capsys, tmpdir): # GH39701 pd.show_versions(as_json=True) result_console = capsys.readouterr().out out_path = os.path.join(tmpdir, "test_json.json") pd.show_versions(as_json=out_path) with open(out_path) as out_fd: result_file = out_fd.read() assert result_console == result_file
def cache_info(cls): f = io.StringIO() with contextlib.redirect_stdout(f): try: pd.show_versions() except Exception as e: msg = ('Unable to collect pandas related information, ' 'pandas.show_versions raises {}({})') logger.error(msg.format(e.__class__.__name__, e)) cls._CACHE = f.getvalue()
def gather_env_info(): # TODO get RAM and hard drive info config = { "os_hardware": {}, "python": {}, "python_libs": {}, "mssql": {}, "mssql_tools": {}, "docker": {}, } # OS and hardware info config["os_hardware"] = { i: getattr(platform.uname(), i) for i in ["system", "release", "version", "machine", "processor"] } config["os_hardware"]["python_compiler"] = platform.python_compiler() # Python config["python"] = {k: v for k, v in sys.implementation.__dict__.items()} # Python libraries with capture_stdout() as mystdout: pd.show_versions(as_json=True) pdv = mystdout.getvalue().replace("'", '"').replace("None", "null") pdv_j = json.loads(pdv) config["python_libs"]["pandas_versions"] = pdv_j # MSSQL config["mssql"] = {"docker-image": mssql_image, "MSSQL_PID": "Express"} # Sql Tools cmd_bcp = ["bcp", "-v"] res = run(_parse_cmd(cmd_bcp), stdout=PIPE, stderr=PIPE, shell=with_shell) if res.returncode == 0: config["mssql_tools"] = { "bcp-version": res.stdout.decode().strip().split("\r\n") } # Docker cmd_docker = ["docker", "version", "--format", "'{{json .}}'"] res = run(_parse_cmd(cmd_docker), stdout=PIPE, stderr=PIPE, shell=with_shell) if res.returncode == 0: docker_out = res.stdout.decode().strip()[ 1:-1] # strip outer single quotes config["docker"] = {"docker-version-output": json.loads(docker_out)} return config
def lambda_handler(event, context): print(pd.__version__) pd.show_versions() post_url = 'https://jsonplaceholder.typicode.com/posts' comments_url = 'https://jsonplaceholder.typicode.com/comments' file_name = 'Posts_Comments.json' bucket_name = 'xl-post' try: step = 0 posts = pd.read_json(post_url) step += 1 comments = pd.read_json(comments_url) except Exception as e: print(f'Failed after Step : {step}') print(e) post_comment = pd.merge(posts, comments, how='outer', left_on='id', right_on='postId') post_comment = post_comment.rename( columns={ 'body_x': 'post_body', 'id_x': 'post_id', 'body_y': 'comments_body', 'id_y': 'comments_id', 'postId': 'comment_postId' }) data_frame = post_comment[[ 'userId', 'post_body', 'title', 'comment_postId', 'comments_body', 'email', 'name' ]] Final_data = (data_frame.groupby( ['userId', 'post_body', 'title'], as_index=True).apply( lambda x: x[['comment_postId', 'comments_body', 'email', 'name'] ].to_dict('r')).reset_index().rename(columns={ 0: 'comments' }).to_json(orient='records')) Final_json = json.dumps(json.loads(Final_data), indent=2, sort_keys=False) with open("/tmp/" + file_name, "w") as outfile: outfile.write(Final_json) s3_client = boto3.client('s3') response = s3_client.upload_file('/tmp/' + file_name, bucket_name, file_name) return { 'statusCode': 200, 'body': json.dumps('Post and Comments merged and posted as a JSON to S3 at ' + str(datetime.datetime.now())) }
def main_func(argv: List[str]): """ Main function that should handle all the top-level processing for this program :param argv: List of arguments passed to the program (meant to be sys.argv) """ # Perform argument parsing and program setup parsed_args, user_args = setup_func(argv, get_mama_parser) # Set Numpy error handling to shunt error messages to a logging function np.seterr(all='call') np.seterrcall(numpy_err_handler) # Attempt to print package version info (pandas has a nice version info summary) if logging.root.level <= logging.DEBUG: logging.debug("Printing Pandas' version summary:") with contextlib.redirect_stdout(io.StringIO()) as f: pd.show_versions() logging.debug("%s\n", f.getvalue()) # Execute the rest of the program, but catch and log exceptions before failing try: # Validate user inputs and create internal dictionary iargs = validate_inputs(parsed_args, user_args) # Run the MAMA pipeline result_sumstats = mama_pipeline( iargs[SUMSTATS_MAP], iargs['ld_scores'], iargs['snp_list'], iargs[COL_MAP], iargs[RE_MAP], iargs[FILTER_MAP], iargs[REG_LD_COEF_OPT], iargs[REG_SE2_COEF_OPT], iargs[REG_INT_COEF_OPT], iargs[REG_LD_COEF_SCALE_COEF], iargs['use_standardized_units'], iargs[HARM_FILENAME_FSTR], iargs[REG_FILENAME_FSTR], iargs['input_sep']) # Write out the summary statistics to disk logging.info("Writing results to disk.") for (ancestry, phenotype), ss_df in result_sumstats.items(): filename = f"{iargs['out']}_{ancestry}_{phenotype}{RESULTS_SUFFIX}" logging.info("\t%s", filename) write_sumstats_to_file(filename, ss_df) # Log any remaining information TODO(jonbjala) Timing info? logging.info("\nExecution complete.\n") # Disable pylint error since we do actually want to capture all exceptions here except Exception as exc: # pylint: disable=broad-except logging.exception(exc) sys.exit(1)
def test_show_versions_console_json(capsys): # GH39701 pd.show_versions(as_json=True) stdout = capsys.readouterr().out # check valid json is printed to the console if as_json is True result = json.loads(stdout) # Basic check that each version element is found in output expected = { "system": _get_sys_info(), "dependencies": _get_dependency_info(), } assert result == expected
def show_versions(): """Prints versions of various dependencies""" output = OrderedDict() output["Date"] = str(datetime.datetime.now()) import sys import platform output["Platform"] = str(platform.platform()) system_information = sys.version_info output["System version"] = "{}.{}".format(system_information.major, system_information.minor) PACKAGES = [ "nilmtk", "nilm_metadata", "numpy", "matplotlib", "pandas", "sklearn", "hmmlearn"] for package_name in PACKAGES: key = package_name + " version" try: exec("import " + package_name) except ImportError: output[key] = "Not found" else: output[key] = eval(package_name + ".__version__") try: print(pd.show_versions()) except: pass else: print("") for k, v in output.iteritems(): print("{}: {}".format(k, v))
def main(): if len(sys.argv) == 4: print(pd.show_versions()) messages_filepath, categories_filepath, database_filepath = sys.argv[ 1:] print('Loading data...\n MESSAGES: {}\n CATEGORIES: {}'.format( messages_filepath, categories_filepath)) df = load_data(messages_filepath, categories_filepath) print('Cleaning data...') df = clean_data(df) print('Saving data...\n DATABASE: {}'.format(database_filepath)) save_data(df, database_filepath) print('Cleaned data saved to database!') else: print('Please provide the filepaths of the messages and categories '\ 'datasets as the first and second argument respectively, as '\ 'well as the filepath of the database to save the cleaned data '\ 'to as the third argument. \n\nExample: python process_data.py '\ 'disaster_messages.csv disaster_categories.csv '\ 'DisasterResponse.db')
def test_show_versions(tmpdir): # GH39701 as_json = os.path.join(tmpdir, "test_output.json") pd.show_versions(as_json=as_json) with open(as_json) as fd: # check if file output is valid JSON, will raise an exception if not result = json.load(fd) # Basic check that each version element is found in output expected = { "system": _get_sys_info(), "dependencies": _get_dependency_info(), } assert result == expected
def test_show_versions(capsys): # gh-32041 pd.show_versions() captured = capsys.readouterr() result = captured.out # check header assert "INSTALLED VERSIONS" in result # check full commit hash assert re.search(r"commit\s*:\s[0-9a-f]{40}\n", result) # check required dependency assert re.search(r"numpy\s*:\s([0-9\.\+a-f]|dev)+\n", result) # check optional dependency assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result)
def test_show_versions_console(capsys): # gh-32041 # gh-32041 pd.show_versions(as_json=False) result = capsys.readouterr().out # check header assert "INSTALLED VERSIONS" in result # check full commit hash assert re.search(r"commit\s*:\s[0-9a-f]{40}\n", result) # check required dependency # 2020-12-09 npdev has "dirty" in the tag assert re.search(r"numpy\s*:\s([0-9\.\+a-g\_]|dev)+(dirty)?\n", result) # check optional dependency assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result)
def test_show_versions_console(capsys): # gh-32041 # gh-32041 pd.show_versions(as_json=False) result = capsys.readouterr().out # check header assert "INSTALLED VERSIONS" in result # check full commit hash assert re.search(r"commit\s*:\s[0-9a-f]{40}\n", result) # check required dependency # 2020-12-09 npdev has "dirty" in the tag # 2022-05-25 npdev released with RC wo/ "dirty". # Just ensure we match [0-9]+\..* since npdev version is variable assert re.search(r"numpy\s*:\s[0-9]+\..*\n", result) # check optional dependency assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result)
def pandas_df_save_demo(): data_dict = { 'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002], 'pop': [1.5, 1.7, 3.6, 2.4, 2.9], } dir_path = os.path.join(os.getenv('HOME'), 'Downloads/tmp_files') # save as csv df1 = DataFrame(data_dict, index=range(1, 6)) print('dataframe to be saved as csv:\n', df1) df1.to_csv(os.path.join(dir_path, 'test_df.csv')) # save as parquet # pre-condition: pip3 install fastparquet pd.show_versions() df2 = DataFrame(data_dict) print('\ndataframe to be saved as parquet:\n', df2) # df2.to_parquet(os.path.join(dir_path, 'test_df.parquet.gzip')) # RuntimeError: Compression 'snappy' not available. Options: ['GZIP', 'UNCOMPRESSED'] df2.to_parquet(os.path.join(dir_path, 'test_df.parquet.gzip'), engine='fastparquet', compression='gzip')
def predict(): form = PredictForm() if form.submit(): SearchStr=form.num1.data TOD="Business Logic" k=0 for a in df.index: X=df['Desc'][a] # tokenization X_list = word_tokenize(X.lower()) Y_list = word_tokenize(SearchStr.lower()) # Fetching all stop words sw = stopwords.words('english') V1 =[];V2 =[] # Stop word removal X_set = {lemmatizer.lemmatize(w) for w in X_list if not w in sw} Y_set = {lemmatizer.lemmatize(w) for w in Y_list if not w in sw} UV = X_set.union(Y_set) for w in UV: if w in X_set: V1.append(1) else: V1.append(0) if w in Y_set: V2.append(1) else: V2.append(0) c = 0 # Calculating cosine similarity for i in range(len(UV)): c+= V1[i]*V2[i] cosine = c / float((sum(V1)*sum(V2))**0.5) Final.loc[Final['Defect_desc']== X,'Similarity']=cosine df_Final=Final.copy() #sum=form.num1.data+form.num2.data df_Final=Final[(Final['Similarity']>0) & (Final['Type of Defect']==TOD)].sort_values(by='Similarity',ascending=False).head(3).head(3) #print(df_Final) df_Final=pd.DataFrame.to_html(df_Final) #print(df_Final) form.abc=df_Final #print(form.abc) pd.show_versions() return render_template('index.html', form=form)
# -*- coding: utf-8 -*- """ Created on Thu Oct 19 07:48:13 2017 @author: baradhwaj """ #Objectives : (indexing, grouping, aggregating, cleaning) #1.Category : Getting started and checking panda setup #Q1: Import panda import pandas as pd import numpy as np #Q2 :Print the version of pandas that has been imported. pd.__version__ # Q3 : Print out all the version information of the libraries that are required by the pandas library. print(pd.show_versions()) # 2. DataFrame basics # A few of the fundamental routines for selecting, sorting, adding and aggregating data in DataFrames #Input: data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'], 'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3], 'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1], 'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']} labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] # Q4 : reate a DataFrame df from this dictionary data which has the index labels. df = pd.DataFrame(data,index=labels) # Q5 : Display a summary of the basic information about this DataFrame and its data. df.info() # Q6 : Return the first 3 rows of the DataFrame df.
# end, stations trip_duration_stats(df) # function to calculate total travel time user_stats(df) count = 1 # counter used to multiply the count number * 5 # to display 5 more rows each time while True: print('Would you like to see 5 rows of raw data? \n') sleep(1) answer = input('Please enter yes or no: \n') answer = answer.lower() if answer == 'yes' or answer == 'y' or answer == 'ye': print(df.head(count * 5)) # used to print 5 more rows of the raw data. sleep(1) count += 1 else: break restart = input('\nWould you like to restart? Enter yes or no.\n') sleep(1) if restart.lower() != 'yes': break if __name__ == "__main__": main() pd.show_versions() # to show versions of packages and libraries used.
import pandas as pd print(pd.__version__) print(pd.show_versions(as_json=True))
import matplotlib.pyplot as plt import pandas as pd #http://pandas.pydata.org/pandas-docs/stable/10min.html #https://github.com/pandas-dev/pandas/blob/master/doc/cheatsheet/Pandas_Cheat_Sheet.pdf ==> Pandas cheatsheet #https://jeffdelaney.me/blog/useful-snippets-in-pandas/ import numpy as np import re import os ###########Display options of spyder,jupyter,terminal################### pd.show_versions() #show all installed library versions pd.describe_option() #shows all options of HOW TO DISPLAY pd.get_option('display.max_rows') pd.set_option('display.max_rows',None) #unlimted display of rows pd.reset_option('display.max_rows') pd.get_option('display.max_columns') pd.set_option('display.max_columns',None) #unlimted display of columns pd.reset_option('display.max_columns') #### In jupyter note book If there is a function to know all the attributes u can use "Shift+Tab twice" #################################### Canopy Data Import ==> its a tool to generate python code from auto import CSV file #################################### PANDAS is built on NUMPY Dataframe is a grid like representation of data Series is part of dataframe (like one column data is a SERIES OBJECT) in otherwords you can say group of SERIES makes a DATAFRAME
#Topic:Version #----------------------------- #libraries import pandas as pd pd.__version__ pd.show_versions(as_json=False) !pip list !pip show pandas !pip freeze #%%% PIP #!python -m pip install --upgrade pip !pip --version !conda update pip #upgrade #https://pandas.pydata.org/pandas-docs/stable/install.html !pip install --upgrade pandas !pip3 install --upgrade pandas !conda update pandas #Windows python -c "import pandas as pd; print(pd.__version__)" conda list | findstr pandas # Anaconda / Conda pip freeze | findstr pandas pip show pandas | findstr Version
import pandas import numpy import configparser config = configparser.ConfigParser() print(config.read('example.ini')) print(config.sections()) print([numpy.random.randint(10) for i in range(10)]) print('Hi my friend') [print(i) for i in range(10)] pandas.show_versions()
def unitily(): version = pd.show_versions() print (version)
import pandas as pd b) Print the version of pandas that has been imported. pd.__version__ import sys for name, module in sorted(sys.modules.items()): if hasattr(module, '__version__'): print (name, module.__version__ ) c) Print out all the version information of the libraries that are required by the pandas library. pd.show_versions() 2. Consider the following Python dictionary data and Python list labels: data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'], 'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3], 'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1], 'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']} labels=['a','b','c','d','e','f','g','h','i','j'] a) Create a DataFrame df from this dictionary data which has the index labels. df=pd.DataFrame(data,index=labels) print(df)
Importing pandas Getting started and checking your pandas setup Difficulty: easy 1. Import pandas under the name pd. In [ ]: import pandas as pd 2. Print the version of pandas that has been imported. In [ ]: pd.__version__ 3. Print out all the version information of the libraries that are required by the pandas library. In [ ]: pd.show_versions() DataFrame basics A few of the fundamental routines for selecting, sorting, adding and aggregating data in DataFrames Difficulty: easy Note: remember to import numpy using: import numpy as np Consider the following Python dictionary data and Python list labels: data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'], 'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3], 'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1], 'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']} labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
import pandas as pd pd.show_versions() # show pandas as well as dependencies versions pd.show_versions(as_json=True) print (pd.__version__) # dataframes is a 2D array # series is a 1D array of indexed data # pandas support both index and label based indexing # accessing a single series # df['column_name'] or df["column_name"] or df.column_name # dot notation won't work if there is a space in the column name # accessing multiple series # df[['column_name1', 'column_name2']] # type(DataFrame) Output : pandas.core.frame.DataFrame # type(DataFrame.SeriesName) Output : pandas.core.series.Series df = pd.read_csv('olympics.csv', skiprows=4) # skip the first 4 rows
def show_version(): return pd.show_versions()
import numpy as np import pandas as pd # 101 Pandas Exercises # 1 Show Pandas Version pd.__version__ pd.show_versions(as_json=True) # 2 Create a series from a list, numpy array, and dictionary mylist = list("abcdefghijklmnopqrstuvwxyz") myarr = np.arange(26) mydict = dict(zip(mylist, myarr)) from_list = pd.Series(mylist) from_array = pd.Series(myarr) from_dictionary = pd.Series(mydict) # 3 Convert series <ser> into a DataFrame, add its index as a column data = pd.DataFrame(zip(from_dictionary, from_dictionary.index)) # my solution data = from_dictionary.to_frame().reset_index() data = from_dictionary.to_frame().reset_index(drop=True) # avoid saving old index # 4 Combine multiple series into a dataframe ser1 = pd.Series(list("abcdefghijklmnopqrstuvwxyz")) ser2 = pd.Series(np.arange(len(ser1))) data = pd.DataFrame({"ser1": ser1, "ser2": ser2}) # my solution data = pd.concat([ser1, ser2], axis=0) # append ser1 by ser2 data = pd.concat([ser1, ser2], axis=1) # cbind ser1 by ser2 # 5 Assign a name to the series
def main(): print(sys.executable) print(pd.show_versions())
# autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys import os # for warning suppression import warnings # for generating GH links with linenumbers import inspect import pandas as pd pd.show_versions() # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath('../sphinxext')) sys.path.insert(0, os.path.abspath('../../../')) # -- General configuration ------------------------------------------------ # use napoleon in lieu of numpydoc 2019-04-23 # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be
# Mock modules so RTD works try: from mock import Mock as MagicMock except ImportError: from unittest.mock import MagicMock class Mock(MagicMock): @classmethod def __getattr__(cls, name): return Mock() MOCK_MODULES = [] sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) import pandas as pd pd.show_versions() # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath('../sphinxext')) sys.path.insert(0, os.path.abspath('../../../')) # -- General configuration ------------------------------------------------ # turns off numpydoc autosummary warnings numpydoc_show_class_members = False # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0'
import orderText import sys import pandas as pd #printing package info print(sys.version) pd.show_versions(as_json=False) #test run of orderText Function unique_list=orderText.removeDuplicates(['first.xml','first.xml','second.xml','second.xml','third.xml']) print(unique_list) #test run of readText Function listInFile=orderText.readText('sample.txt') print(listInFile)