コード例 #1
0
ファイル: database_manager.py プロジェクト: xsiir/etlproject
 def getConnectionDetails(self):
     return constants.CONNECTION_PROPERTIES.format(
         get_property('db_config', 'driver'),
         get_property('db_config', 'server_name'),
         get_property('db_config', 'database_name'),
         get_property('db_config', 'username'),
         get_property('db_config', 'password'))
コード例 #2
0
ファイル: database_manager.py プロジェクト: xsiir/etlproject
    def __initialize_playlist_history_database(self):
        user_id = get_property(constants.TABLES_DETAILS_DOMAIN,
                               constants.COVER_ID_COLUMN_NAME_KEY)
        song_id = get_property(constants.TABLES_DETAILS_DOMAIN,
                               constants.COVER_SONG_ID_COLUMN_NAME_KEY)
        play_date = get_property(
            constants.TABLES_DETAILS_DOMAIN,
            constants.PLAYLIST_HISTORY_DATE_COLUMN_NAME_KEY)

        table_name = get_property(constants.TABLES_DETAILS_DOMAIN,
                                  constants.PLAYLIST_HISTORY_TABLE_NAME_KEY)

        self.__execute_create_table_query([user_id, song_id, play_date],
                                          table_name)
コード例 #3
0
ファイル: database_manager.py プロジェクト: xsiir/etlproject
    def __initialize_covers_database(self):
        cover_id = get_property(constants.TABLES_DETAILS_DOMAIN,
                                constants.COVER_ID_COLUMN_NAME_KEY)
        cover_song_id = get_property(constants.TABLES_DETAILS_DOMAIN,
                                     constants.COVER_SONG_ID_COLUMN_NAME_KEY)
        artist_name = get_property(constants.TABLES_DETAILS_DOMAIN,
                                   constants.COVER_ARTIST_NAME_COLUMN_NAME_KEY)
        title = get_property(constants.TABLES_DETAILS_DOMAIN,
                             constants.COVER_TITLE_NAME_COLUMN_NAME_KEY)

        table_name = get_property(constants.TABLES_DETAILS_DOMAIN,
                                  constants.COVER_TABLE_NAME_KEY)

        self.__execute_create_table_query(
            [cover_id, cover_song_id, artist_name, title], table_name)
コード例 #4
0
ファイル: song_dao.py プロジェクト: xsiir/etlproject
 def find_most_popular_artists(self, count):
     artist_column_name = get_property(
         constants.TABLES_DETAILS_DOMAIN,
         constants.COVER_ARTIST_NAME_COLUMN_NAME_KEY),
     result = self.__database_manager.select(
         self.__find_most_popular(count, artist_column_name))
     return result
コード例 #5
0
class FileLoader:
    __BATCH_SIZE = int(get_property(constants.APPLICATION_PROPERTIES_DOMAIN, constants.BATCH_SIZE_KEY))

    def __init__(self, encoding_format, column_separator):
        self.__database_manager = DatabaseManager()
        self.__item_processed = 0
        self.__inMemoryRecords = []
        self.__encoding_format = encoding_format
        self.__column_separator = column_separator

    @time_counter
    def to_database_from_file(self, file_path, table, columns):

        print("Zaczynam wczytywanie...")
        with open(file_path, encoding=self.__encoding_format) as fileContent:
            for line in fileContent:
                if self.__if_use_batch() and self.__if_records_in_memory_equals_to_batch_size():
                    self.__insert_items_to_database(table, columns)
                    self.__clear_precessed_items()
                self.__inMemoryRecords.append(self.__getColumnsPerRecord(line))
                self.__item_processed = self.__item_processed + 1
        self.__insert_items_to_database(table, columns)
        self.__clear_precessed_items()

        print("Koncze wczytywanie...")
        pass

    def __get_prepared_insert_statement(self, table, columns):

        values_gaps = ""
        for i in range(columns):
            if i == columns - 1:
                values_gaps += "?"
            else:
                values_gaps += "?,"

        statement = constants.INSERT_INTO.format(table, values_gaps)
        return statement

    def __if_use_batch(self):
        return bool(get_property(constants.APPLICATION_PROPERTIES_DOMAIN, constants.USE_BATCH))

    def __if_records_in_memory_equals_to_batch_size(self):
        return self.__item_processed == FileLoader.__BATCH_SIZE

    def __insert_items_to_database(self, table, columns):
        prepared_insert_statement = self.__get_prepared_insert_statement(table, columns)
        self.__database_manager.executeMany(prepared_insert_statement, self.__inMemoryRecords)
        self.__database_manager.commit()

    def __clear_precessed_items(self):
        self.__inMemoryRecords = []
        self.__item_processed = 0
        pass

    def __getColumnsPerRecord(self, textLine):
        # Remove \n from the end of line
        line_without_end_line_sign = textLine.rstrip('\n')
        return line_without_end_line_sign.split(self.__column_separator)
コード例 #6
0
ファイル: database_manager.py プロジェクト: xsiir/etlproject
    def __execute_create_table_query(self, column_names, table_name):

        columns = []
        for column_name in column_names:
            columns.append(create_standard_varchar_column(column_name))

        merged_columns = create_columns(columns)
        database_name = get_property(constants.DATABASE_CONFIG_DOMAIN,
                                     constants.DATABASE_SERVER_NAME)
        database_schema = get_property(constants.DATABASE_CONFIG_DOMAIN,
                                       constants.DATABASE_SCHEMA)
        table_origin = '{}.{}'.format(database_name, database_schema)

        query = constants.CREATE_TABLE.format(table_origin, table_name,
                                              merged_columns)
        print(query)
        self.__crsr.execute(query)
コード例 #7
0
ファイル: main.py プロジェクト: xsiir/etlproject
def main():
    """
        Pomysł na wykonanie projektu:
            1) Niech istnieje klasa (FilelLoader) będąca odpowiedzialna za różne opracje na plikach.
                - u nas będzie to wczytanie danych z pliku .txt i zapis tychże danych do bazy.
            2) Niech cała aplikacja korzysta tylko z jednego połączenia do bazy danych (DatabaseManager).
                Niech klasa to obslugujaca bedzie singletonem. Kazda operacja na bazie danych powinna przechodzic przez ta klase.
            3) Wszystkie bardziej zlozone polecenia SQL tudziez inne bloki tekstowe niech beda sie znajdowac w jednym miejscu.
                - plik ten nazwalem 'constants'
            4) Wszystkie dane konfiguracyjne niech beda trzymane w pliku properties a dostep do nich bedzie mozliwy poprzez
                jedno narzedzie (tu: metoda) dla calej aplikacji


        Wczytywanie danych z pliku do bazy danych trwa ~11:47
    """

    print("Prace przygotowal: Sienkiewicz Maciej")
    encoding_format = get_property(constants.FILE_CONFIG_DOMAIN,
                                   constants.ENCODING_FORMAT_KEY)
    field_separator = get_property(constants.FILE_CONFIG_DOMAIN,
                                   constants.FIELD_SEPARATOR_KEY)
    file_loader = FileLoader(encoding_format, field_separator)

    print("Zaladujmy pierwszy plik do bazy...")
    first_file_path = get_property(constants.FILE_PATHS_DOMAIN,
                                   constants.UNIQUE_TRACKS_PATH)
    file_loader.to_database_from_file(first_file_path, "model.dbo.Covers", 4)
    print("Ladowanie skonczone...")

    print("Zaladujmy drugi plik do bazy...")
    second_file_path = get_property(constants.FILE_PATHS_DOMAIN,
                                    constants.PLAYLIST_HISTORY_PATH)
    file_loader.to_database_from_file(second_file_path,
                                      "model.dbo.PlaylistHistory", 3)
    print("Ladowanie skonczone...")

    song_dao = SongDAO()
    for row in song_dao.find_most_popular_artists('5'):
        print('Utworty artysty: {} zostaly odtworzone {} razy'.format(
            row[0], row[1]))

    for row in song_dao.find_most_popular_tiles('5'):
        print('Utwor {} zostal odtworzone {} razy'.format(row[0], row[1]))
コード例 #8
0
ファイル: song_dao.py プロジェクト: xsiir/etlproject
    def __find_most_popular(self, count, searched_column_name):
        database_name = get_property(constants.DATABASE_CONFIG_DOMAIN,
                                     constants.DATABASE_SERVER_NAME)
        database_schema = get_property(constants.DATABASE_CONFIG_DOMAIN,
                                       constants.DATABASE_SCHEMA)
        table_origin = '{}.{}'.format(database_name, database_schema)

        query = constants.SELECT_MOST_POPULAR_BAND.format(
            count, table_origin, searched_column_name[0],
            get_property(constants.TABLES_DETAILS_DOMAIN,
                         constants.COVER_TABLE_NAME_KEY),
            get_property(constants.TABLES_DETAILS_DOMAIN,
                         constants.PLAYLIST_HISTORY_TABLE_NAME_KEY),
            get_property(constants.TABLES_DETAILS_DOMAIN,
                         constants.COVER_SONG_ID_COLUMN_NAME_KEY),
            get_property(constants.TABLES_DETAILS_DOMAIN,
                         constants.PLAYLIST_HISTORY_SONG_ID_COLUMN_NAME_KEY))
        return query
コード例 #9
0
 def __if_use_batch(self):
     return bool(get_property(constants.APPLICATION_PROPERTIES_DOMAIN, constants.USE_BATCH))
コード例 #10
0
ファイル: song_dao.py プロジェクト: xsiir/etlproject
 def find_most_popular_tiles(self, count):
     title = get_property(constants.TABLES_DETAILS_DOMAIN,
                          constants.COVER_TITLE_NAME_COLUMN_NAME_KEY),
     result = self.__database_manager.select(
         self.__find_most_popular(count, title))
     return result