Esempio n. 1
0
    def insert(self, table, dataframe, **args):
        onDupUpdateKey = []

        for c in dataframe.columns:
            if c != 'ReportDate' and c != 'Code' and c != 'Date' and c != 'ValuationMethod':
                onDupUpdateKey.append('%s=VALUES(%s)' %
                                      (c.replace(" ", ""), c.replace(" ", "")))

        sql_insert = 'INSERT INTO %s(%s) VALUES(%s) %s' % (
            table,
            ','.join(dataframe.columns),
            ','.join(['%s'] * len(dataframe.columns)),
            'ON DUPLICATE KEY UPDATE ' + ','.join(onDupUpdateKey),
        )

        try:
            row = [tuple(r) for r in dataframe.values]
            self.__sqlConnect.cursor().executemany(sql_insert, row)
            # NB : you won't get an IntegrityError when reading
            self.__sqlConnect.commit()
        except mysql.connector.Error as err:
            LogHandler.log_exceptions("""
                    Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n
            """.format(args['filename'], sql_insert, err))
        finally:
            self.__sqlConnect.cursor().close()
Esempio n. 2
0
 def saveDataFrame(self, dataframe, table):
     try:
         dataframe.to_sql(name=table,
                          con=self.__con.getEngine(),
                          index=False,
                          if_exists='append',
                          dtype=self.__con.getDbType()[table])
     except DBAPIError as e:
         LogHandler.log_exceptions("Sql Exceptions: %s\r\n" % e)
         pass
     finally:
         pass
Esempio n. 3
0
 def __init__(self):
     self.__db_name = dbConfig['database']
     LogHandler.log_msg("DB engine initializing...")
     try:
         self.engine = create_engine(
             create_engine('mysql://%s:%s@%s:%s/%s' % (
                 dbConfig['user'],
                 dbConfig['password'],
                 dbConfig['host'],
                 dbConfig['port'],
                 dbConfig['database'],
             ),
                           echo=False))
     except DBAPIError as err:
         LogHandler.log_exceptions(err)
     finally:
         LogHandler.log_msg("Done.")
Esempio n. 4
0
    def engine_insert_update(self, dataframe, table, **args):
        onDupUpdateKey = []

        for c in dataframe.columns:
            if c != 'ReportDate' and c != 'Code' and c != 'Date' and c != 'ValuationMethod':
                onDupUpdateKey.append('%s=VALUES(%s)' %
                                      (c.replace(" ", ""), c.replace(" ", "")))

        sql_insert = 'INSERT INTO %s(%s) VALUES(%s) %s' % (
            table,
            ','.join(dataframe.columns),
            ','.join(['%s'] * len(dataframe.columns)),
            'ON DUPLICATE KEY UPDATE ' + ','.join(onDupUpdateKey),
        )

        try:
            with self.__engine.connect() as con:
                row = [tuple(x) for x in dataframe.values]
                con.execute(sql_insert, *row)
        except DBAPIError as err:
            LogHandler.log_exceptions("""
                            Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n
                    """.format(args['filename'], sql_insert, err))
        except DataError as e:
            LogHandler.log_exceptions("""
                                        Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n
                                """.format(args['filename'], sql_insert, e))
        except OperationalError as ope:
            LogHandler.log_exceptions("""
                                        Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n
                                """.format(args['filename'], sql_insert, ope))
        pass
Esempio n. 5
0
thread_num = 4

file_nums = 0
threaded_file = []

if __name__ == '__main__':
    start_time = time.process_time_ns()
    for root, directories, files in os.walk("csv"):
        all_files = len(files)
        sublist_size = round(len(files) / thread_num)
        threaded_file = [
            files[i:i + sublist_size]
            for i in range(0, len(files), sublist_size)
        ]
        threads = []
        for index, tf in enumerate(threaded_file):
            p = StreamThread(sublist=tf, threadCode="Thread-{}".format(index))
            threads.append(p)

        for th in threads:
            try:
                th.start()
                th.join()
            except Error as err:
                LogHandler.log_exceptions(
                    "Error: unable to start thread, msg: {0}".format(err))

    end_time = time.process_time_ns()
    print("All threads time spend: %sms" % round(
        (end_time - start_time) / 1000000, 5))