def __init__(self): self.__data = Extractor().extract_csv_data() self.__loader = Loader() # save all extracted DataFrames from csv files to parquet files for k, v in self.__data.items(): self.__loader.save_to_parquet(k, v) # reads all saved parquet files data_files = self.__loader.read_parquets("weather") # combines all DataFrames into one to get the highest temp from all records self.__df = pd.concat(data_files, ignore_index=True)
return self.data.to_json(**params) if __name__ == "__main__": from load import Loader # Load the data url = 'http://apps.nccd.cdc.gov/brfss/list.' \ 'asp?cat=HI&yr=2013&qkey=8671&state=All' attrs = {'border': 1, 'cellpadding': 5, 'cellspacing': 0} params = { 'url': url, 'header': 0, 'attrs': attrs } load = Loader() load.setParams(params) data = load.loadHtml().data # Transform the data jsonFp = './states.json' colName = 'State:' # initiate transformer object transform = Transformer(data) transform.setTransformDictionary(jsonFp) # get long values of the states statesDict = transform.flipTransformDictionary() states = statesDict.keys()
cor_forms = target_dict_list[6]['records'] cap_projects = target_dict_list[3]['records'] expense_builders = target_dict_list[4]['records'] npv_tasks = target_dict_list[1]['records'] print( 'All data successfully queried. Any errors after this point are due to DATA VALIDATION ONLY.' ) t = Transformer(opps, service_orders, quotes, cor_forms, cap_projects, expense_builders) valid_opp_to_service_orders = t.validate_opp_to_service_order() valid_opp_to_quote_or_cor_form = t.validate_opp_to_quote_or_cor_form( valid_opp_to_service_orders) standardized_opp_to_cp_or_eb = t.standardize_opp_to_cp_or_eb( valid_opp_to_quote_or_cor_form) valid_opp_to_cp_or_eb = t.validate_opp_to_cp_or_eb( valid_opp_to_quote_or_cor_form, standardized_opp_to_cp_or_eb) # print(valid_opp_to_cp_or_eb) ## All validation stages passed, applicable NPV tasks can now be closed: l = Loader(valid_opp_to_cp_or_eb, npv_tasks) if full_soql_query_mode else None tasks_closed = l.load_tasks() if full_soql_query_mode else [] if len(tasks_closed) == 0: print('0 NPV tasks validated by automation.') elif len(tasks_closed) > 0: print( '{} NPV tasks validated by automation. Please move to Stage 5 via the corresponding report queue: \n \ https://zayo.my.salesforce.com/00O0z000005btK4'.format( len(tasks_closed)))
from load import Loader from CCE.loadCCE_2015 import CCE_2015_Campaign from DAPloaders import runGliderLoader, runLrauvLoader class Campaigns(): pass # Reuse CCELoader and Loader code to create our test db and load a # small amount of data for testing of the loading code db_alias = 'stoqs' campaign_name = 'Loading test database' campaign_description = 'Test database for all kinds of data: EPIC from CCE, Glider, LRAUV, etc.' campaign = CCE_2015_Campaign(db_alias, campaign_name) loader = Loader() campaigns = Campaigns() loader.args = Namespace() loader.args.test = False loader.args.clobber = True loader.args.db = db_alias loader.args.drop_indexes = False campaigns.campaigns = {db_alias: 'CCE/loadCCE_2015.py'} loader.load(campaigns, create_only=True) # Load only the March 2016 event lores Mooring data for ms2 campaign.hires_event_times = [] campaign.lores_event_times = [campaign.lores_event_times[1]] campaign.cl.ccems2_start_datetime, campaign.cl.ccems2_end_datetime = campaign.lores_event_times[
def setUp(self): self.loader = Loader() self.project_dir = os.path.abspath(__file__ + "/../../") for f in glob.glob(self.project_dir + "/resources/*.parquet.gzip"): os.remove(f)
#!/usr/bin/env python3 from fetch import Fetcher from load import Loader if __name__ == '__main__': options = { # Prices to fetch list 'data_file': "fetch_list.txt", # Price data file 'file_name': "watch_prices.txt", # Data directory 'output_directory': "./", } l = Loader() data = l.load_data(options['data_file']) options['data'] = data w = Fetcher(options) w.run()