def create(is_print_elapsed: bool, _is_test: bool, default_directories: dict, default_table_columns_names: dict) -> None: """It's works only because its school assignment""" data = DataImport('<SEP>', False, _is_test) data.create_engine(**{str(Elapsed.print_elapsed): is_print_elapsed}) for key, value in default_directories.items(): data.address = value data.columns_name_list = default_table_columns_names[key] data.data_import(key, **{str(Elapsed.print_elapsed): is_print_elapsed}) data.disconnect_engine()
def process_chunks(config, ipResolver, geoservice: GeoService, datareader: DataImport, datawriter: DataExport) -> bool: """Triggers parallel processes. Resolves IP in pandas dataframes chunks and stores ip resolved data in S3. """ # Initialize logging logger = utility.getlogger('ip_resolution', 'ip_resolution') seconds = time.time() try: query_panoply = config["panoplydatabase"]["readQuery"] for dataframe_ip_address in datareader.getbatch_pandas_dataframe( query_panoply): dataframes = utility.split_dataframe(dataframe_ip_address) processNo = 0 processList = [] for frame in enumerate(dataframes): processNo = processNo + 1 process_ipresolve = processes.Process( target=ipResolver.resolve_ipaddress, args=(frame[1], geoservice, datawriter, processNo)) processList.append(process_ipresolve) process_ipresolve.start() logger.info('processNo-' + str(process_ipresolve.pid)) for p in processList: p.join() # print(str(p.exitcode)) except Exception as ex: logger.info('Issue in fetching data from Panoply:' + str(ex)) logger.error(utility.print_exception()) return False logger.info("Finished the batch job in %s seconds" % str( (time.time() - seconds) // 1)) return True
def print_info(is_print_elapsed: bool) -> None: """It's works only because its school assignment""" most_popular_artist = 'SELECT artist, COUNT(artist) as performances ' \ 'FROM tracks JOIN triplets ON tracks.track_id = triplets.track_id ' \ 'GROUP BY artist ORDER BY performances DESC LIMIT 1' five_most_popular_songs = 'SELECT title, COUNT(title) as performances ' \ 'FROM tracks JOIN triplets ON tracks.track_id = triplets.track_id ' \ 'GROUP BY title ORDER BY performances DESC LIMIT 5' data = DataImport('') print('Most popular artist: ') print_result( data.execute(most_popular_artist, **{str(Elapsed.print_elapsed): is_print_elapsed}), '{0}. {1} -- {2}') print('Five most popular songs: ') print_result( data.execute(five_most_popular_songs, **{str(Elapsed.print_elapsed): is_print_elapsed}), '{0}. {1} -- {2}')
from data_import import DataImport from build_coordinates import BuildCoordinates a = DataImport("data/centerline.csv") b = BuildCoordinates(a.get_xs(), a.get_ys(), a.get_zs()) b.plot('red', 'o')
def global_store_df(): di = DataImport() df_deliveries, df_customers, df_colocations, df_levels = di.read_data_hdf() return df_deliveries, df_customers, df_colocations, df_levels
SOFTMAX_SIZE = 128 OUTPUT_SIZE = 2 BATCH_REDUCE_ITERATION = 50 BATCH_REDUCE_STEP = 4 ACCURACY_CACHE_SIZE = 5 STOPPING_THRESHOLD = 0 REGENERATE_CHUNKS = True # Generate folder for tensorboard summary files summary_save_dir = os.getcwd() + "/summaries/" + FRAMES_FOLDER + "_" + DISTANCE_DATA + "_lr" + str(LEARNING_RATE) + "_t" + str(THRESHOLD) + "_bs" + str(BATCH_SIZE) + "_ts" + str(TIME_STEPS) + "_p" + str(SEQUENCE_SPACING) + "_cs" + str(CELL_SIZE) + "x" + str(CELL_LAYERS) + "x" + str(HIDDEN_SIZE) os.mkdir(summary_save_dir) # Create data importer object DI = DataImport(FRAMES_FOLDER, SEQUENCE_SPACING, DISTANCE_DATA, THRESHOLD, BATCH_SIZE, TIME_STEPS, channels=IMAGE_CHANNELS, image_size=IMAGE_WIDTH) # Generate chunks for more efficient loaded if REGENERATE_CHUNKS: os.mkdir(os.getcwd() + "/chunks") dataFolders = [path for path in glob.glob(os.getcwd() + "/*") if os.path.isdir(path) and not "chunks" in path and not "summaries" in path] for path in dataFolders: DI.import_folder(path) # Helper functions def load_batch(sess, coord, op): batch_count = 0 batch_size = BATCH_SIZE
import os from db.db import Database from data_import import ReadYAML, CreateDatabase, DataImport, FileHelper from data_preparation import PrepareDatabase, PrepareLayers from data_preparation import Profiles CreateDatabase(ReadYAML()).create_pgpass_files() db_conn = Database() data_import = DataImport(ReadYAML(), False, db_conn) prepare_db = PrepareDatabase(ReadYAML(), False, db_conn) prepare_layers = PrepareLayers(ReadYAML(), False, prepare_db, db_conn) #data_import.load_js_lib() #data_import.import_osm2pgrouting() #prepare_layers.ways() #prepare_layers.walkability() slope_profiles = Profiles(db_suffix='', ways_table='footpath_visualization', filter_ways='') slope_profiles.compute_average_slope()