コード例 #1
0
ファイル: main.py プロジェクト: beoArcha/Python_lab01_ETL
def create(is_print_elapsed: bool, _is_test: bool, default_directories: dict,
           default_table_columns_names: dict) -> None:
    """It's works only because its school assignment"""
    data = DataImport('<SEP>', False, _is_test)
    data.create_engine(**{str(Elapsed.print_elapsed): is_print_elapsed})
    for key, value in default_directories.items():
        data.address = value
        data.columns_name_list = default_table_columns_names[key]
        data.data_import(key, **{str(Elapsed.print_elapsed): is_print_elapsed})
    data.disconnect_engine()
コード例 #2
0
def process_chunks(config, ipResolver, geoservice: GeoService,
                   datareader: DataImport, datawriter: DataExport) -> bool:
    """Triggers parallel processes.

    Resolves IP in pandas dataframes chunks and stores ip resolved data in S3.
    """
    # Initialize logging
    logger = utility.getlogger('ip_resolution', 'ip_resolution')
    seconds = time.time()
    try:
        query_panoply = config["panoplydatabase"]["readQuery"]
        for dataframe_ip_address in datareader.getbatch_pandas_dataframe(
                query_panoply):
            dataframes = utility.split_dataframe(dataframe_ip_address)
            processNo = 0
            processList = []
            for frame in enumerate(dataframes):
                processNo = processNo + 1
                process_ipresolve = processes.Process(
                    target=ipResolver.resolve_ipaddress,
                    args=(frame[1], geoservice, datawriter, processNo))
                processList.append(process_ipresolve)
                process_ipresolve.start()
                logger.info('processNo-' + str(process_ipresolve.pid))
            for p in processList:
                p.join()
                # print(str(p.exitcode))
    except Exception as ex:
        logger.info('Issue in fetching data from Panoply:' + str(ex))
        logger.error(utility.print_exception())
        return False
    logger.info("Finished the batch job in %s seconds" % str(
        (time.time() - seconds) // 1))
    return True
コード例 #3
0
ファイル: main.py プロジェクト: beoArcha/Python_lab01_ETL
def print_info(is_print_elapsed: bool) -> None:
    """It's works only because its school assignment"""
    most_popular_artist = 'SELECT artist, COUNT(artist) as performances ' \
                          'FROM tracks JOIN triplets ON tracks.track_id = triplets.track_id ' \
                          'GROUP BY artist ORDER BY performances DESC LIMIT 1'
    five_most_popular_songs = 'SELECT title, COUNT(title) as performances ' \
                              'FROM tracks JOIN triplets ON tracks.track_id = triplets.track_id ' \
                              'GROUP BY title ORDER BY performances DESC LIMIT 5'
    data = DataImport('')
    print('Most popular artist: ')
    print_result(
        data.execute(most_popular_artist,
                     **{str(Elapsed.print_elapsed): is_print_elapsed}),
        '{0}. {1} -- {2}')
    print('Five most popular songs: ')
    print_result(
        data.execute(five_most_popular_songs,
                     **{str(Elapsed.print_elapsed): is_print_elapsed}),
        '{0}. {1} -- {2}')
コード例 #4
0
from data_import import DataImport
from build_coordinates import BuildCoordinates

a = DataImport("data/centerline.csv")
b = BuildCoordinates(a.get_xs(), a.get_ys(), a.get_zs())
b.plot('red', 'o')
コード例 #5
0
ファイル: linde_gui.py プロジェクト: AIENGINE/Data_Analysis
def global_store_df():
    di = DataImport()
    df_deliveries, df_customers, df_colocations, df_levels = di.read_data_hdf()
    return df_deliveries, df_customers, df_colocations, df_levels
コード例 #6
0
SOFTMAX_SIZE = 128
OUTPUT_SIZE = 2

BATCH_REDUCE_ITERATION = 50
BATCH_REDUCE_STEP = 4
ACCURACY_CACHE_SIZE = 5
STOPPING_THRESHOLD = 0

REGENERATE_CHUNKS = True

# Generate folder for tensorboard summary files
summary_save_dir = os.getcwd() + "/summaries/" + FRAMES_FOLDER + "_" + DISTANCE_DATA + "_lr" + str(LEARNING_RATE) + "_t" + str(THRESHOLD) + "_bs" + str(BATCH_SIZE) + "_ts" + str(TIME_STEPS) + "_p" + str(SEQUENCE_SPACING) + "_cs" + str(CELL_SIZE) + "x" + str(CELL_LAYERS) + "x" + str(HIDDEN_SIZE)
os.mkdir(summary_save_dir)

# Create data importer object
DI = DataImport(FRAMES_FOLDER, SEQUENCE_SPACING, DISTANCE_DATA, THRESHOLD, BATCH_SIZE, TIME_STEPS, channels=IMAGE_CHANNELS, image_size=IMAGE_WIDTH)

# Generate chunks for more efficient loaded
if REGENERATE_CHUNKS:
    os.mkdir(os.getcwd() + "/chunks")

    dataFolders = [path for path in glob.glob(os.getcwd() + "/*") if
                   os.path.isdir(path) and not "chunks" in path and not "summaries" in path]
    for path in dataFolders:
        DI.import_folder(path)


# Helper functions
def load_batch(sess, coord, op):
    batch_count = 0
    batch_size = BATCH_SIZE
コード例 #7
0
import os
from db.db import Database
from data_import import ReadYAML, CreateDatabase, DataImport, FileHelper
from data_preparation import PrepareDatabase, PrepareLayers

from data_preparation import Profiles

CreateDatabase(ReadYAML()).create_pgpass_files()
db_conn = Database()
data_import = DataImport(ReadYAML(), False, db_conn)
prepare_db = PrepareDatabase(ReadYAML(), False, db_conn)
prepare_layers = PrepareLayers(ReadYAML(), False, prepare_db, db_conn)

#data_import.load_js_lib()
#data_import.import_osm2pgrouting()
#prepare_layers.ways()
#prepare_layers.walkability()

slope_profiles = Profiles(db_suffix='',
                          ways_table='footpath_visualization',
                          filter_ways='')
slope_profiles.compute_average_slope()