Python for_file Exemples

Langage de programmation: Python

Espace de nommage/Pack: lib_cinci.data_folder

Méthode/Fonction: for_file

Exemples au hotexamples.com: 6

Python for_file - 6 exemples trouvés. Ce sont les exemples réels les mieux notés de lib_cinci.data_folder.for_file extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Associées

dict_into_table

SQL_STATEMENTS

start

Container

process_logs

Ui_tablePreviewDialog

install_gnu

_jinja2_filter_reversed

droidGetId

read_votes

Related in langs

User_Model_Acl_Resource (PHP)

base64_pack (PHP)

DistributorBusiness (C#)

RepositorioMarcas (C#)

setupWindows (C++)

iupTableGet (C++)

LinuxPodSandboxConfig (Go)

Join (Go)

ScholarUserDao (Java)

Label (Java)

Exemple #1

0

Afficher le fichier

Fichier : clean.py Projet : conorhenley/cincinnati

import pandas as pd from lib_cinci import data_folder import os path_to_data_folder = data_folder.for_file(__file__) os.chdir(os.path.join(path_to_data_folder, 'tmp')) print 'Working in folder: {}'.format(path_to_data_folder) #Load csv file, parse DATE column df = pd.read_csv("fire.csv", parse_dates=['incident_date']) print 'Raw file has {:,d} rows and {:,d} columns'.format(*df.shape) #Lowercase column names df.columns = df.columns.map(lambda s: s.lower()) df.rename(columns={'street_address': 'address'}, inplace=True) #Check how many rows have empty addresses print '{:,d} rows with empty address, removing those'.format(df.address.isnull().sum()) #Remove rows without address df = df[df.address.notnull()] #Check for duplicates duplicates = df.duplicated() n_duplicates = duplicates.sum() print 'Found {:,d} duplicates, dropping them'.format(n_duplicates) df = df[~duplicates] #Strip some columns #csvsql has a bug that is not producing the

Exemple #2

0

Afficher le fichier

Fichier : parse_csv_tax_file.py Projet : dssg/cincinnati

year = int(sys.argv[2]) # Set folder where this file is located as working direcory script_dir = os.path.abspath(os.path.dirname(__file__)) os.chdir(script_dir) print "Loading definitions.yaml from: %s" % os.getcwd() with open("definitions.yaml") as f: definitions = yaml.load(f.read()) names = definitions["names"][year] # Move current directory do all I/O operations take place in the corresponding # Data folder data_folder = data_folder.for_file(__file__) os.chdir(data_folder) print "Changing working dir to: %s" % os.getcwd() # Create tmp file if it does not exist if not os.path.exists("tmp"): print "Creating tmp folder in %s" % os.getcwd() os.makedirs("tmp") print "Loading data from %d..." % year # Force all columns to be read as strings to prevent pandas elminating leading 0s # and other weird stuff. The are some columns with only one blank space, interpret those # as NA df = pd.read_csv(input_file, names=names, dtype=np.str, na_values=[" "])

Exemple #3

0

Afficher le fichier

Fichier : clean.py Projet : TorontoDataScientistsWithoutBorders/cincinnati

import pandas as pd from lib_cinci import data_folder import os import sys path_to_data_folder = data_folder.for_file(__file__) os.chdir(os.path.join(path_to_data_folder, 'tmp')) print 'Working in folder: %s' % path_to_data_folder input_filename = "diff_crime.csv" output_filename = "diff_crime_clean.csv" #Load csv file df = pd.read_csv(input_filename, dtype=object) print 'Raw file has {:,d} rows and {:,d} columns'.format(*df.shape) #Lowercase column names df.columns = df.columns.map(lambda s: s.lower()) df['occurredon'] = pd.to_datetime(df['occurredon']) df = df.set_index('occurredon') #Rename some columns mapping = {'location': 'address', 'addressstate': 'state'} df.index.rename('occurred_on', inplace=True) df.rename(columns=mapping, inplace=True) #We are only using data starting from 2012 df = df[df.index.year >= 2012] print 'Subset from 2012 has {:,d} rows and {:,d} columns'.format(*df.shape)

Exemple #4

0

Afficher le fichier

import os import pandas as pd from lib_cinci import data_folder #Move current directory do all I/O operations take place in the corresponding #Data folder data_folder = data_folder.for_file(__file__) os.chdir(data_folder) #Create tmp file if it does not exist if not os.path.exists('tmp'): print('Creating tmp folder in %s' % os.getcwd()) os.makedirs('tmp') #Move to tmp folder os.chdir('tmp') print('Changing working dir to: %s' % os.getcwd()) def read_for_year(year): df = pd.read_csv("owners_{}_resolved.csv".format(year)) df["parcel_id"] = df["parcel_id"].astype(str) #dupes = df.groupby(level=0).filter(lambda x: len(x) > 1) df = df.drop_duplicates(subset='parcel_id') df = df.set_index("parcel_id") entities = df["entity"] entities.name = "owner_" + str(year)

Exemple #5

0

Afficher le fichier

Fichier : update.py Projet : TorontoDataScientistsWithoutBorders/cincinnati

#Step zero: read from yaml file parser = argparse.ArgumentParser() parser.add_argument("update_file", help="Path to yaml file with configuration parameters") args = parser.parse_args() with open(args.update_file, 'r') as f: params = yaml.load(f) db_column = params['storage']['column'] file_column = params['source']['column'] schema = params['storage']['schema'] #Folder to use for I/O folder = data_folder.for_file(args.update_file) os.chdir(folder) logger.info('Using {} for I/O operations'.format(folder)) #Step one: download file from remote server if user provided url try: url = params['source']['url'] except Exception, e: logger.info('URL was not present in the configuration file...') else: logger.info('Downloading file...'.format(folder)) data_file = urllib2.urlopen(url) #Dowload file replacing it if already exists with open(params['source']['filename'], 'wb') as output: output.write(data_file.read())

Exemple #6

0

Afficher le fichier

Fichier : update.py Projet : conorhenley/cincinnati

logger = logging.getLogger() #Step zero: read from yaml file parser = argparse.ArgumentParser() parser.add_argument("update_file", help="Path to yaml file with configuration parameters") args = parser.parse_args() with open(args.update_file, 'r') as f: params = yaml.load(f) db_column = params['storage']['column'] file_column = params['source']['column'] schema = params['storage']['schema'] #Folder to use for I/O folder = data_folder.for_file(args.update_file) os.chdir(folder) logger.info('Using {} for I/O operations'.format(folder)) #Step one: download file from remote server if user provided url try: url = params['source']['url'] except Exception, e: logger.info('URL was not present in the configuration file...') else: logger.info('Downloading file...'.format(folder)) data_file = urllib2.urlopen(url) #Dowload file replacing it if already exists with open(params['source']['filename'],'wb') as output: output.write(data_file.read())