Example #1
0
class ImportExcel:
    def __init__(self):
        self.realpath = os.path.split(os.path.realpath(sys.argv[0]))[0]
        # get configuration from 'config.ini'
        self.HandleConfig = HandleConfig()
        self.ConnDB = ConnDB()
        self.conn = self.ConnDB.conndb()
        self.img = self.realpath + "\\" + self.HandleConfig.handle_config(
            "g", "referencefile", "img")
        self.cleansql = self.realpath + "\\" + self.HandleConfig.handle_config(
            "g", "referencefile", "cleanexcel")
        self.default_dir = self.HandleConfig.handle_config(
            "g", "global", "default_excels_dictionary")

    def main(self):
        # enter a database you will import excel to
        self.db = easygui.enterbox(msg="Enter your database name:")
        if not self.db:
            return
        self.db = self.db.lower().strip()

        # choose excel file dictionary
        self.importexcelpath = easygui.diropenbox(
            msg="Choose your excels dictionary:", default=self.default_dir)
        if not self.importexcelpath:
            return

        excelcsvs = self.get_excel()
        if not excelcsvs:
            easygui.msgbox("No excels can import!")
            return

        # anything failed excel to import will be wrote in it
        log_file = self.importexcelpath + "\\log.txt"
        if os.path.isfile(log_file):
            os.remove(log_file)

        # create database
        try:
            sql = "create database `{0}`;".format(self.db)
            self.ConnDB.exec(self.conn, sql)

        except pymysql.err.ProgrammingError:
            conti = easygui.ynbox(
                msg="Database {0} exists, drop it first?".format(self.db))
            if conti:
                print("Dropping database...")
                sql = "drop database if exists `{0}`;create database `{0}`".format(
                    self.db, self.db)
                self.ConnDB.exec(self.conn, sql)

        print("Created database {}".format(self.db))
        self.conn_db = self.ConnDB.conndb(self.db)

        longexcelcsvs = defaultdict()
        long_num = 0
        num = 0
        num_s = 0

        print("Begin to import...\n")
        for excelcsv, origin_tablename in excelcsvs.items():
            self.excel_name = excelcsv
            try:
                isexcel = 0
                # get csv dataset
                if re.fullmatch(r"^.*?\.csv$", excelcsv, flags=re.IGNORECASE):
                    datasets = defaultdict()
                    csv = self.importexcelpath + "\\" + excelcsv
                    with open(csv, 'rb') as f:
                        bytes = f.read(1000000)
                    encode = chardet.detect(bytes)['encoding']
                    if encode == 'ascii':
                        encode = 'ansi'  # ansi is a super charset of ascii
                    dataset = pd.read_csv(csv,
                                          encoding=encode,
                                          dtype=str,
                                          na_filter=False,
                                          header=0,
                                          engine="c")
                    datasets['sheet1'] = dataset

                # get excel dataset(include sheets)
                if re.fullmatch(r"^.*?\.xlsx?$", excelcsv,
                                flags=re.IGNORECASE):
                    isexcel = 1
                    excel = self.importexcelpath + "\\" + excelcsv
                    datasets = pd.read_excel(excel,
                                             dtype=str,
                                             na_filter=False,
                                             header=0,
                                             sheet_name=None)

                # one sheet/csv is one table
                for k, v in datasets.items():
                    created_table = None
                    try:
                        sheet_name = k
                        dataset = v
                        tablename = origin_tablename
                        self.excel_name = excelcsv
                        # rename table name if excel have more than one sheets
                        if isexcel == 1 and len(datasets) > 1:
                            tablename = origin_tablename + '_' + re.sub(
                                r"[^\w]+",
                                "_",
                                sheet_name,
                                flags=re.IGNORECASE)
                            self.excel_name = excelcsv + '.' + sheet_name
                        tablename = tablename.lower()
                        # cut off table name
                        if len(tablename.encode("utf8")) > 64:
                            if self.is_Chinese(tablename):
                                tablename = "{0}_".format(
                                    long_num) + tablename[:20]
                            else:
                                tablename = "{0}_".format(
                                    long_num) + tablename[:60]
                            long_num += 1
                            longexcelcsvs[excelcsv] = tablename
                            with open(log_file, "a", encoding="utf8") as fw:
                                fw.write(
                                    "extra long excel: {0}, tablename: {1}\n".
                                    format(self.excel_name, tablename))

                        col_maxlen, dataset = self.read_data(dataset)

                        if dataset.empty:
                            raise EmptyError("Empty")
                        created_table, created_sql = self.create_table(
                            col_maxlen, tablename)

                        try:
                            self.insert_data(dataset, tablename)
                        except Exception as reason:
                            reason_num_0 = str(reason).split(",")[0].strip("(")
                            if reason_num_0 == "1366":
                                try:
                                    sql_0 = "alter table {0} convert to character set utf8mb4 collate utf8mb4_bin".format(
                                        created_table)
                                    self.ConnDB.exec(self.conn_db, sql_0)
                                    self.insert_data(dataset,
                                                     tablename,
                                                     charset="utf8mb4")

                                except pymysql.err.InternalError as reason:
                                    reason_num_1 = str(reason).split(
                                        ",")[0].strip("(")
                                    if reason_num_1 == "1118":
                                        sql = re.sub(r"varchar\(\d+\)", "text",
                                                     created_sql)
                                        sql_1 = "drop table if exists {0};".format(
                                            tablename)
                                        print(sql_0)
                                        self.ConnDB.exec(self.conn_db, sql_1)
                                        self.ConnDB.exec(self.conn_db, sql)

                                        sql_0 = "alter table {0} convert to character set utf8mb4 collate utf8mb4_bin".format(
                                            created_table)
                                        self.ConnDB.exec(self.conn_db, sql_0)
                                        self.insert_data(dataset,
                                                         tablename,
                                                         charset="utf8mb4")
                                    else:
                                        raise pymysql.err.InternalError(
                                            str(reason))

                            elif reason_num_0 == "1118":
                                sql = re.sub(r"varchar\(\d+\)", "text",
                                             created_sql)
                                sql_0 = "drop table if exists {0};".format(
                                    tablename) + sql
                                self.ConnDB.exec(self.conn_db, sql_0)
                                self.insert_data(dataset, tablename)

                            else:
                                raise pymysql.err.InternalError(str(reason))

                    except Exception as reason:
                        print("Failed: {}".format(self.excel_name))

                        with open(log_file, "a", encoding="utf8") as fw:
                            fw.write(
                                "excel sheet name: {0}, error: {1}\n".format(
                                    self.excel_name, str(reason)))

                        if created_table:
                            sql = "drop table if exists {0}".format(
                                created_table)
                            self.ConnDB.exec(self.conn_db, sql)

                        continue

                    else:
                        print("Imported: {}".format(self.excel_name))
                        num_s += 1

                    finally:
                        num += 1

            except Exception as reason:
                print("Failed: {}".format(excelcsv))
                with open(log_file, "a", encoding="utf8") as fw:
                    fw.write("excel file name: {0}, error: {1}\n".format(
                        self.excel_name, str(reason)))
                num += 1
                continue

        print("\nTotal: {}, Imported: {}\n".format(num, num_s))
        self.conn.close()
        self.conn_db.close()

        conti = 1
        if os.path.isfile(log_file):
            os.popen(log_file)
            easygui.msgbox(
                "You have logs , see file '{}' \n\ncheck it first".format(
                    log_file))
            if num_s == 0:
                easygui.msgbox("No imported tables!")
                return
            conti = easygui.ccbox(msg="Clean database {} now?".format(self.db))

        if conti:
            self.clean_data()

    def is_Chinese(self, word):
        for ch in word:
            if '\u4e00' <= ch <= '\u9fff':
                return True
        return False

    def get_excel(self):
        # a function to get excel/csv file under the dictionary
        excels = os.listdir(self.importexcelpath)
        excelcsvs = defaultdict()

        for excel in excels:
            excel_dir = self.importexcelpath + "\\" + excel
            if os.path.isfile(excel_dir) and re.fullmatch(
                    r"^.*?\.(xls|xlsx|csv)$", excel, flags=re.IGNORECASE):
                tablename = re.sub(r"\.(xls|xlsx|csv)$",
                                   '',
                                   excel.lower(),
                                   flags=re.IGNORECASE)
                # replace all character not \w to "_"
                tablename = re.sub(r"[^\w]+",
                                   "_",
                                   tablename,
                                   flags=re.IGNORECASE)
                excelcsvs[excel] = tablename
        return excelcsvs

    def read_data(self, dataset):
        # str col
        dataset.columns = [str(col) for col in dataset.columns]
        self.columns = dataset.columns
        # replace % to _
        self.columns = [
            str(col).strip().replace('%', '_') for col in self.columns
        ]

        # cut off col
        def f(x):
            if len(x.encode("utf8")) <= 63:
                x = x
            elif self.is_Chinese(x):
                x = x[:20].strip()
            else:
                x = x[:62].strip()
            return x

        self.columns = [f(col) for col in self.columns]

        # fix duplicate column name
        while True:
            low_col = [col.lower() for col in self.columns]
            idx = 0
            odx = 0
            c = 0
            for i in self.columns:
                jdx = 0
                n = 1
                if idx == len(self.columns):
                    continue
                for j in low_col[idx + 1:]:
                    odx = idx + 1 + jdx
                    if j == i.lower():
                        self.columns[odx] = j + str(n)
                        n += 1
                        c += 1
                    jdx += 1
                idx += 1
            if c == 0:
                break

        dataset.columns = self.columns
        self.columns = np.array(self.columns)
        self.columns = self.columns.tolist()

        # deal with data
        f = lambda x: str(x).strip()
        dataset = dataset.applymap(f)
        f = lambda x: len(x.encode("utf8"))
        df1 = dataset.applymap(f)
        f = lambda x: max(x)
        df2 = df1.apply(f, axis=0)
        col_maxlen = df2.to_dict()

        df3 = df1.apply(f, axis=1)
        df3 = pd.DataFrame(df3, columns=["c"])
        indexs = df3.loc[df3["c"] == 0].index
        dataset.drop(indexs, inplace=True)

        f = lambda x: None if x == "" else x
        dataset = dataset.applymap(f)

        return col_maxlen, dataset

    def create_table(self, col_maxlen, tablename):
        sql = "create table {0}(".format(tablename)
        for col, maxLen in col_maxlen.items():
            colType = "varchar(255)"
            if maxLen > 255:
                colType = "TEXT"
            if maxLen > 65535:
                colType = "MEDIUMTEXT"
            if maxLen > 16777215:
                colType = "LONGTEXT"

            sql = sql + "`{0}` {1} default null,".format(col, colType)

        sql = sql[:-1] + ")"

        try:
            self.ConnDB.exec(self.conn_db, sql)
        except:
            sql = re.sub(r"varchar\(\d+\)", "text", sql)
            self.ConnDB.exec(self.conn_db, sql)

        return tablename, sql

    def insert_data(self, dataset, tablename, charset='utf8'):
        # insert
        dataset = np.array(dataset)  # dataframe to ndarray
        datalist = dataset.tolist()  # ndarray to list
        cols = "`,`".join(self.columns)
        l = len(self.columns)
        v = "%s," * l
        v = v[:-1]
        sql = "insert into `%s`(%s) values(" % (tablename, "`" + cols + "`")
        sql = sql + "%s)" % v

        if charset == "utf8mb4":
            conn = self.ConnDB.conndb(db=self.db, charset=charset)
            cur = conn.cursor()
            cur.executemany(sql, datalist)
            conn.commit()
            cur.close()
            conn.close()
        else:
            cur = self.conn_db.cursor()
            cur.executemany(sql, datalist)
            self.conn_db.commit()
            cur.close()
            #conn.close()

    def clean_data(self):
        print('Begin to clean data...\n')
        file = self.cleansql
        ret = self.ConnDB.cmd(self.db, "mysql", file)
        if ret == 0:
            easygui.msgbox(msg="Import Over", image=self.img)
        else:
            easygui.exceptionbox("Clean Data Failed")
Example #2
0
class ImportExcel:
    def __init__(self):
        self.HandleConfig = HandleConfig()
        self.ConnDB = ConnDB()
        self.MysqlDump = MysqlDump()
        self.cleansql = self.HandleConfig.handle_config(
            'g', 'referencefile', 'cleanexcel')
        self.nickname = self.HandleConfig.handle_config(
            'g', 'excelimporter', 'nickname')

    def main(self, currentwork, advanced=0):
        self.dbname = self.HandleConfig.handle_config('g', currentwork,
                                                      'dbname')
        na_values = self.HandleConfig.handle_config('g', 'excelimporter',
                                                    'na_values').split(',')
        na_values = [i.strip() for i in na_values]
        if advanced:
            layout = [[sg.InputText('', key='e'),
                       sg.FilesBrowse()],
                      [
                          sg.Submit(tooltip='Click to submit this form'),
                          sg.Cancel()
                      ]]
            window = sg.Window(title=currentwork, layout=layout)
            event, values = window.read()
            window.close()
            if event in (None, 'Cancel'):
                return
            files = values['e']
            files_list = files.split(';')
            files = []
            for f in files_list:
                files.append(f.split('/')[-1])
            self.importexcelpath = os.path.dirname(files_list[0]).replace(
                '/', '\\') + '\\'
            excelcsvs = self.get_excel(excels=files)
        else:
            self.importexcelpath = self.HandleConfig.handle_config(
                'g', currentwork, 'jirapath') + 'excel\\'
            excelcsvs = self.get_excel()

        if not excelcsvs:
            sg.Popup('No excels can import!')
            return
        log_file = self.importexcelpath + '\\log.txt'
        if os.path.isfile(log_file):
            os.remove(log_file)

        sql = 'drop database if exists `{0}`;\ncreate database `{0}`'.format(
            self.dbname, self.dbname)
        conn_db = self.ConnDB.conndb()
        self.ConnDB.exec(conn_db, sql)
        conn_db.close()
        print('\n\n{}'.format(sql))
        self.conn = self.ConnDB.conndb(self.dbname)
        self.sql_mode = self.ConnDB.exec(
            self.conn, 'SELECT @@SESSION.sql_mode').fetchall()[0][0]

        longexcelcsvs = defaultdict()
        long_num = 0
        num = 0
        num_s = 0

        print('\n\nBegin to import...\n')
        for excelcsv, origin_tablename in excelcsvs.items():
            self.excel_name = excelcsv
            try:
                isexcel = 0
                if re.fullmatch('^.*?\\.csv$', excelcsv,
                                flags=(re.IGNORECASE)):
                    datasets = defaultdict()
                    csv = self.importexcelpath + '\\' + excelcsv

                    # Determining the encoding of a CSV file
                    # http://pandaproject.net/docs/determining-the-encoding-of-a-csv-file.html
                    try:
                        dataset = pd.read_csv(csv,
                                              encoding='utf-8',
                                              dtype=str,
                                              na_values=na_values,
                                              keep_default_na=False,
                                              header=0,
                                              engine='c')
                    except UnicodeDecodeError:
                        try:
                            dataset = pd.read_csv(csv,
                                                  encoding='ansi',
                                                  dtype=str,
                                                  na_values=na_values,
                                                  keep_default_na=False,
                                                  header=0,
                                                  engine='c')
                        except UnicodeDecodeError:
                            try:
                                dataset = pd.read_csv(csv,
                                                      encoding='utf-16',
                                                      dtype=str,
                                                      na_values=na_values,
                                                      keep_default_na=False,
                                                      header=0,
                                                      engine='c')
                            except UnicodeDecodeError:
                                with open(csv, 'rb') as f:
                                    bytes = f.read()
                                    if len(bytes) > 100000:
                                        with open(csv, 'rb') as f:
                                            bytes = f.readline()
                                encode = chardet.detect(bytes)['encoding']
                                if encode == 'ascii':
                                    encode = 'ansi'  #ansi is a super charset of ascii
                                dataset = pd.read_csv(csv,
                                                      encoding=encode,
                                                      dtype=str,
                                                      na_filter=False,
                                                      header=0,
                                                      engine="c")
                    datasets['sheet1'] = dataset
                if re.fullmatch('^.*?\\.xlsx?$',
                                excelcsv,
                                flags=(re.IGNORECASE)):
                    isexcel = 1
                    excel = self.importexcelpath + '\\' + excelcsv
                    datasets = pd.read_excel(excel,
                                             dtype=str,
                                             na_values=na_values,
                                             keep_default_na=False,
                                             header=0,
                                             sheet_name=None)

                for k, v in datasets.items():
                    created_table = None
                    try:

                        sheet_name = k
                        dataset = v
                        tablename = origin_tablename
                        self.excel_name = excelcsv
                        if isexcel == 1:
                            if len(datasets) > 1:
                                tablename = origin_tablename + '_' + re.sub(
                                    '[^0-9a-z]+',
                                    '_',
                                    sheet_name,
                                    flags=(re.IGNORECASE))
                                self.excel_name = excelcsv + '.' + sheet_name
                        tablename = tablename.lower()
                        if len(tablename) > 55:
                            tablename = tablename[:51] + '_{0}'.format(
                                long_num)
                            long_num += 1
                            longexcelcsvs[excelcsv] = tablename
                            with open(log_file, 'a') as (fw):
                                fw.write(
                                    'extra long excel: {0}, tablename: {1}\n'.
                                    format(self.excel_name, tablename))
                        col_maxlen, dataset = self.read_data(dataset)
                        if dataset.empty:
                            raise EmptyError('Empty')
                        created_table, created_sql = self.create_table(
                            col_maxlen, tablename)
                        try:
                            self.insert_data(dataset, tablename)

                        except pymysql.err.InternalError as reason:

                            reason_num_0 = str(reason).split(',')[0].strip('(')

                            if reason_num_0 == '1366':
                                try:
                                    sql_1 = 'truncate table `{0}`.`{1}`;'.format(
                                        self.dbname, tablename)
                                    self.ConnDB.exec(self.conn, sql_1)
                                    self.ConnDB.exec(
                                        self.conn, 'set SESSION sql_mode = ""')
                                    self.insert_data(dataset, tablename)
                                    self.ConnDB.exec(
                                        self.conn,
                                        'set SESSION sql_mode = "{}"'.format(
                                            self.sql_mode))

                                except pymysql.err.InternalError as reason:
                                    reason_num_1 = str(reason).split(
                                        ',')[0].strip('(')
                                    if reason_num_1 == '1118':
                                        sql = re.sub('varchar\\(\\d+\\)',
                                                     'text', created_sql)
                                        sql_1 = 'drop table if exists `{0}`.`{1}`;'.format(
                                            self.dbname, tablename)
                                        self.ConnDB.exec(self.conn, sql_1)
                                        self.ConnDB.exec(self.conn, sql)

                                        self.ConnDB.exec(
                                            self.conn,
                                            'set SESSION sql_mode = ""')
                                        self.insert_data(dataset, tablename)
                                        self.ConnDB.exec(
                                            self.conn,
                                            'set SESSION sql_mode = "{}"'.
                                            format(self.sql_mode))

                                    else:
                                        raise pymysql.err.InternalError(
                                            str(reason))

                            elif reason_num_0 == '1118':
                                sql = re.sub('varchar\\(\\d+\\)', 'text',
                                             created_sql)
                                sql_0 = 'drop table if exists `{0}`.`{1}`;'.format(
                                    self.dbname, tablename) + sql
                                self.ConnDB.exec(self.conn, sql_0)
                                self.insert_data(dataset, tablename)
                            else:
                                raise pymysql.err.InternalError(str(reason))

                    except Exception as reason:
                        print('Failed: {}'.format(self.excel_name))
                        with open(log_file, 'a') as (fw):
                            fw.write(
                                'excel sheet name: {0}, error: {1}\n'.format(
                                    self.excel_name, str(reason)))
                        if created_table:
                            sql = 'drop table if exists `{0}`.`{1}`'.format(
                                self.dbname, created_table)
                            self.ConnDB.exec(self.conn, sql)
                        continue

                    else:
                        print('Imported: {}'.format(self.excel_name))
                        num_s += 1
                    finally:
                        num += 1
                        self.ConnDB.exec(
                            self.conn, 'set SESSION sql_mode = "{}"'.format(
                                self.sql_mode))

            except Exception as reason:
                print('Failed: {}'.format(excelcsv))
                with open(log_file, 'a') as (fw):
                    fw.write('excel file name: {0}, error: {1}\n'.format(
                        self.excel_name, str(reason)))
                num += 1
                continue

        print('\nTotal: {}, Imported: {}\n'.format(num, num_s))

        if os.path.isfile(log_file):
            os.popen(log_file)
            ret = sg.Popup(
                'You have logs. Check them first!\n\nPress OK to continue to dump database if you have checked.',
                title=currentwork)
            if ret is None:
                return
        if num_s == 0:
            sg.Popup('No imported tables!', title=currentwork)
            return

        self.clean_data()
        if advanced:
            sqlfile = self.HandleConfig.handle_config(
                'g', currentwork, 'jirapath') + 'script\\{0}.sql'.format(
                    self.dbname)
            if os.path.exists(sqlfile):
                self.ConnDB.cmd(self.dbname, 'mysql', sqlfile)

        self.MysqlDump.main(currentwork)
        sg.Popup('Import Excel Complete!', title=currentwork)

    def get_excel(self, excels=None):

        if not excels:
            excels = os.listdir(self.importexcelpath)
        excelcsvs = defaultdict()
        for excel in excels:
            excel_dir = self.importexcelpath + '\\' + excel
            if os.path.isfile(excel_dir) and re.fullmatch(
                    '^.*?\\.(xls|xlsx|csv)$', excel, flags=re.IGNORECASE):
                tablename = re.sub('\\.(xls|xlsx|csv)$',
                                   '', (excel.lower()),
                                   flags=re.IGNORECASE)
                tablename = 'z_excel_' + self.nickname + '_' + re.sub(
                    '[^0-9a-z]+', '_', tablename, flags=re.IGNORECASE)
                excelcsvs[excel] = tablename

        return excelcsvs

    def read_data(self, dataset):
        dataset = dataset.fillna(value="")
        f = lambda x: str(x).strip()
        dataset = dataset.applymap(f)
        f = lambda x: len(x)
        df1 = dataset.applymap(f)
        f = lambda x: max(x)

        df3 = df1.apply(f, axis=1)
        df3 = pd.DataFrame(df3, columns=['c'])
        indexs = df3.loc[(df3['c'] == 0)].index
        dataset.drop(indexs, inplace=True)

        # deal with columns
        dataset.columns = [str(col) for col in dataset.columns]
        self.columns = dataset.columns
        low_col = [col.lower() for col in self.columns]
        s = len(low_col)
        if 'unnamed: {}'.format(s - 1) in low_col:
            self.columns = dataset[0:1]
            self.columns = np.array(self.columns)
            self.columns = self.columns.tolist()[0]
            dataset.columns = self.columns
            dataset.drop(dataset[:1].index, inplace=True)
            low_col = [col.lower() for col in self.columns]

        if 'ignore' in low_col:
            self.columns = dataset[0:1]
            self.columns = np.array(self.columns)
            self.columns = self.columns.tolist()[0]
            dataset.columns = self.columns
            dataset.drop(dataset[:1].index, inplace=True)

        self.columns = [str(col).strip() for col in self.columns]
        # fix blank col name
        f = lambda x: "unnamed" if x == "" else x
        self.columns = [f(col) for col in self.columns]

        f = lambda x: x if len(x) <= 63 else x[:62].strip()
        self.columns = [f(col) for col in self.columns]
        # fix duplicate column name
        while 1:
            low_col = [col.lower() for col in self.columns]
            idx = 0
            odx = 0
            c = 0
            for i in self.columns:
                jdx = 0
                n = 1
                if idx == len(self.columns):
                    continue
                for j in low_col[idx + 1:]:
                    odx = idx + 1 + jdx
                    if j == i.lower():
                        self.columns[odx] = j + str(n)
                        n += 1
                        c += 1
                    jdx += 1

                idx += 1

            if c == 0:
                break

        dataset.columns = self.columns
        self.columns = np.array(self.columns)
        self.columns = self.columns.tolist()

        f = lambda x: max(x)
        df1.columns = self.columns
        df2 = df1.apply(f, axis=0)
        col_maxlen = df2.to_dict()

        f = lambda x: None if x == "" else x
        dataset = dataset.applymap(f)

        return col_maxlen, dataset

    def create_table(self, col_maxlen, tablename):
        sql = 'create table `{0}`.`{1}`('.format(self.dbname, tablename)
        for col, maxLen in col_maxlen.items():
            colType = 'varchar(255)'
            if maxLen > 255:
                colType = 'TEXT'
            if maxLen > 65535:
                colType = 'MEDIUMTEXT'
            if maxLen > 16777215:
                colType = 'LONGTEXT'
            sql = sql + '`{0}` {1} default null,'.format(col, colType)

        sql = sql[:-1] + ')'
        try:
            self.ConnDB.exec(self.conn, sql)
        except pymysql.InternalError:
            sql = re.sub('varchar\\(\\d+\\)', 'text', sql)
            self.ConnDB.exec(self.conn, sql)

        return (tablename, sql)

    def insert_data(self, dataset, tablename):
        dataset = np.array(dataset)
        datalist = dataset.tolist()
        cols = '`,`'.join(self.columns)
        l = len(self.columns)
        v = '%s,' * l
        v = v[:-1]

        sql = 'insert into `%s`.`%s`(%s) values(' % (self.dbname, tablename,
                                                     '`' + cols + '`')
        sql = sql + '%s)' % v
        self.ConnDB.exec(self.conn, sql, datalist=datalist)

    def clean_data(self):
        print('Begin to clean data...\n')
        file = self.cleansql
        ret = self.ConnDB.cmd(self.dbname, 'mysql', file)
        if ret == 0:
            print('Succeed: Clean data\n')
        else:
            sg.Popup('Clean Data Failed')
Example #3
0
class MysqlRestore:
    def __init__(self):
        self.HandleConfig = HandleConfig()
        self.MysqlDump = MysqlDump()
        self.ConnDB = ConnDB()
        self.conn = self.ConnDB.conndb()

    def main(self, currentwork, advanced=1):
        jirapath = self.HandleConfig.handle_config('g', currentwork,
                                                   'jirapath')
        dbname = self.HandleConfig.handle_config('g', currentwork, 'dbname')
        git_repo_path = self.HandleConfig.handle_config(
            'g', 'defaultpath', 'git_repo_path')
        merge = self.HandleConfig.handle_config('g', currentwork, 'merge')
        navicat_script_path = self.HandleConfig.handle_config(
            'g', 'defaultpath', 'navicat_script_path')

        gitscriptpath = git_repo_path + 'dataImportScript\\script\\'
        gittemplatepath = git_repo_path + 'templates\\'
        scriptspath = jirapath + 'script\\'
        scripts_bak_path = jirapath + 'db_backup\\'
        tmpscript = scriptspath + 'tmp.txt'
        script = navicat_script_path + dbname + '\\{}.sql'.format(dbname)
        backupsql = scriptspath + '{0}_after.sql'.format(dbname)
        excel_sql_file = scriptspath + '{0}.sql'.format(dbname)

        # truncate tmp.txt first
        with open(tmpscript, 'w', encoding='utf8') as (fw):
            fw.truncate()

        files = []
        drop = 1
        backup_files = [backupsql]
        msg = ''

        if not advanced:
            # restore database in config
            backupgz = scripts_bak_path + '{}_backup.sql.gz'.format(dbname)
            backupsql = scripts_bak_path + '{0}_backup.sql'.format(dbname)
            with gzip.open(backupgz, 'rb') as (f_in):
                with open(backupsql, 'wb') as (f_out):
                    shutil.copyfileobj(f_in, f_out)
            backup_files = [backupsql, scriptspath + '{0}.sql'.format(dbname)]
            files = [
                scriptspath + 'configration.sql',
                gitscriptpath + 'functionAndProcedure.sql',
                gitscriptpath + 'smokeTestV2.sql'
            ]

            # get b4_size
            b4_size = os.path.getsize(backupgz) / 1024 / 1024
            b4_size = '{:.0f}'.format(b4_size)
            self.HandleConfig.handle_config('s', currentwork, 'b4_size',
                                            b4_size)

        elif advanced == 1:
            # restore database
            msg = 'Restore Database Complete!'
            layout = [[sg.Button('RestoreToLocalHost')],
                      [sg.Button('RestoreToAWSHotfix')]]

            window = sg.Window(title=currentwork, layout=layout)
            event, values = window.read()
            window.close()

            if event is None:
                return

            elif event == 'RestoreToLocalHost':
                pass

            elif event == 'RestoreToAWSHotfix':
                self.MysqlDump.main(currentwork, gz=True)
                winscppath = self.HandleConfig.handle_config(
                    'g', 'defaultpath', 'winscppath')
                db_backup = self.HandleConfig.handle_config(
                    'g', currentwork,
                    'jirapath') + 'db_backup\\{0}.sql.gz'.format(dbname)
                cmd = '{0}WinSCP.com /command "open aws188" "put {1} /home/neon/leiwu/dataimport/fulldata/" "exit"'.format(
                    winscppath, db_backup)
                os.system(cmd)
                sg.popup('Restore To AWS Hotfix Complete')
                return

        elif advanced == 2:
            # restore database in run script
            msg = 'Runscript Database Complete!'

            layout = [
                [
                    sg.Checkbox('AllScriptWithoutSmokeTest',
                                key='AllScriptWithoutSmokeTest')
                ], [sg.Checkbox('ExcelSqlFile', key=excel_sql_file)],
                [sg.Checkbox('TemplateScript', key=script)],
                [sg.Checkbox('TemplateSmokeTest', key='TemplateSmokeTest')],
                [
                    sg.Checkbox('TemplateScriptAfter',
                                key=navicat_script_path + dbname +
                                '\\{}_after_template.sql'.format(dbname))
                ],
                [
                    sg.Checkbox('AccountMerge',
                                key=gitscriptpath + 'accountMerge.sql')
                ],
                [
                    sg.Checkbox('AccountMergeAfter',
                                key=navicat_script_path + dbname +
                                '\\{}_after_merge.sql'.format(dbname))
                ],
                [
                    sg.Checkbox('dataClean',
                                key=gitscriptpath + 'fullDataClean.sql')
                ],
                [
                    sg.Checkbox('SmokeTest',
                                key=gitscriptpath + 'smokeTestV2.sql')
                ],
                [
                    sg.Checkbox(
                        'All Script and Restore To AWS Hotfix'.format(dbname),
                        key='AllScript',
                        default=True,
                        text_color='blue')
                ],
                [
                    sg.Checkbox('Drop database if exists {}'.format(dbname),
                                key='drop',
                                default=True,
                                text_color='blue')
                ], [sg.Submit(), sg.Cancel()]
            ]
            window = sg.Window(title=currentwork, layout=layout)
            event, values = window.read()
            window.close()

            if event in (None, 'Cancel'):
                return

            if values['drop']:
                drop = 1
            else:
                drop = 0
                backup_files = []

            if values['AllScript'] or values['AllScriptWithoutSmokeTest']:
                files = [
                    script, navicat_script_path + dbname +
                    '\\{}_after_template.sql'.format(dbname),
                    gitscriptpath + 'accountMerge.sql', navicat_script_path +
                    dbname + '\\{}_after_merge.sql'.format(dbname),
                    gitscriptpath + 'fullDataClean.sql',
                    gitscriptpath + 'smokeTestV2.sql',
                    gitscriptpath + 'clear.sql'
                ]
                if merge == 'False':
                    files.remove(gitscriptpath + 'accountMerge.sql')
                if values['AllScriptWithoutSmokeTest']:
                    files.remove(gitscriptpath + 'smokeTestV2.sql')

                for k, v in values.items():
                    if v:
                        if k != 'AllScript' and k != 'AllScriptWithoutSmokeTest' and k != 'drop':
                            sg.PopupError('Confict Selection!')
                            return

            else:
                backup_files = []
                for k, v in values.items():
                    if v:
                        if k == 'TemplateSmokeTest':
                            files = files + [
                                gittemplatepath +
                                'template_smoke_test\\smoke_test_for_account.sql',
                                gittemplatepath +
                                'template_smoke_test\\smoke_test_for_custom_field.sql',
                                gittemplatepath +
                                'template_smoke_test\\smoke_test_for_gais.sql',
                                gittemplatepath +
                                'template_smoke_test\\smoke_test_for_transaction.sql'
                            ]
                        elif k == 'drop':
                            continue
                        else:
                            files.append(k)

        sqlfiles = backup_files
        if len(files) > 0:
            for f in files:
                with open(f, 'r', encoding='utf8') as (fa):
                    with open(tmpscript, 'a+', encoding='utf8') as (fw):
                        fw.writelines(['\n'])
                        shutil.copyfileobj(fa, fw)
            sqlfiles = sqlfiles + [tmpscript]
            #sqlfiles = sqlfiles + files

        if drop:
            sql = 'drop database if exists `{}`;\ncreate database `{}`;'.format(
                dbname, dbname)
            self.ConnDB.exec(self.conn, sql)
            print('\n\n{}'.format(sql))

        for sqlfile in sqlfiles:
            if not os.path.exists(sqlfile):
                continue
            ret = self.ConnDB.cmd(dbname, 'mysql', sqlfile)
            if ret == 0:
                continue
            else:
                sg.popup_error('Error!')
                return 0

        if advanced == 2 and values['AllScript']:
            conn = self.ConnDB.conndb(dbname)
            sql = 'SELECT * FROM smoke;'
            ret = self.ConnDB.exec(conn, sql)
            result = ret.fetchall()
            if len(result) > 0:
                layout = [
                    [
                        sg.Table(result, [
                            'errorMsg', 'Error Count After', 'Error Count Pre',
                            'Pass Flag'
                        ],
                                 col_widths=[60, 15, 15, 15],
                                 auto_size_columns=False,
                                 justification="left")
                    ],
                    [
                        sg.Text(
                            'Press OK to continue to dump database and upload to winscp.'
                        )
                    ], [sg.OK(), sg.Cancel()]
                ]
                window = sg.Window(title=currentwork, layout=layout)
                event, values = window.read()
                window.close()
                if event in ('Cancel', None):
                    return
            self.MysqlDump.main(currentwork, gz=True)
            winscppath = self.HandleConfig.handle_config(
                'g', 'defaultpath', 'winscppath')
            db_backup = self.HandleConfig.handle_config(
                'g', currentwork,
                'jirapath') + 'db_backup\\{0}.sql.gz'.format(dbname)
            cmd = '{0}WinSCP.com /command "open aws188" "put {1} /home/neon/leiwu/dataimport/fulldata/" "exit"'.format(
                winscppath, db_backup)
            os.system(cmd)
        elif advanced == 2 and values['TemplateSmokeTest']:
            conn = self.ConnDB.conndb(dbname)
            sql = 'SELECT templateTableName, errorMsg, errorCount, passFlag FROM smoke_test_report_for_template WHERE passFlag<>"Pass" ORDER BY passFlag;'
            ret = self.ConnDB.exec(conn, sql)
            result = ret.fetchall()
            if len(result) > 0:
                layout = [
                    [
                        sg.Table(result, [
                            'templateTableName', 'errorMsg', 'Error Count',
                            'Pass Flag'
                        ],
                                 col_widths=[30, 100, 15, 15],
                                 auto_size_columns=False,
                                 justification="left")
                    ],
                    [
                        sg.Text(
                            'SELECT * FROM smoke_test_report_for_template WHERE passFlag<>"Pass" ORDER BY passFlag;'
                        ),
                        sg.Button('Copy')
                    ]
                ]
                window1 = sg.Window(title=currentwork, layout=layout)
                event1, values1 = window1.read()
                window1.close()
                if event1 is None:
                    return
                if event1 == 'Copy':
                    pyperclip.copy(
                        'SELECT * FROM smoke_test_report_for_template WHERE passFlag<>"Pass" ORDER BY passFlag;'
                    )
        elif advanced == 2 and values[gitscriptpath + 'smokeTestV2.sql']:
            conn = self.ConnDB.conndb(dbname)
            sql = 'SELECT * FROM smoke;'
            ret = self.ConnDB.exec(conn, sql)
            result = ret.fetchall()
            if len(result) > 0:
                layout = [
                    [
                        sg.Table(result, [
                            'errorMsg', 'Error Count After', 'Error Count Pre',
                            'Pass Flag'
                        ],
                                 col_widths=[100, 15, 15, 15],
                                 auto_size_columns=False,
                                 justification="left")
                    ],
                ]
                window1 = sg.Window(title=currentwork, layout=layout)
                event1, values = window1.read()
                window1.close()

        if advanced:
            sg.Popup(msg, title=currentwork)
Example #4
0
class MysqlDump:
    def __init__(self):
        self.HandleConfig = HandleConfig()
        self.ConnDB = ConnDB()

    def main(self,
             currentwork,
             advanced=0,
             gz=False,
             after=0,
             op='mysqldump-no-r'):
        jirapath = self.HandleConfig.handle_config('g', currentwork,
                                                   'jirapath')
        dbname = self.HandleConfig.handle_config('g', currentwork, 'dbname')
        sqlname = '{0}.sql'.format(dbname)
        sqlfile = jirapath + 'script\\' + sqlname
        if after:
            sqlfile = jirapath + 'script\\' + '{0}_after.sql'.format(dbname)
        if not op:
            op = 'mysqldump-no-r'
        tablesname = ''

        if advanced:
            layout = [
                [sg.Text('Database Name: {0}'.format(dbname))],
                [sg.Text('Tables Name:'),
                 sg.InputText(key='tablesname')],
                [sg.Text('File Name:'),
                 sg.InputText(key='filename')],
                [sg.Submit(tooltip='Click to submit this form'),
                 sg.Cancel()]
            ]
            window = sg.Window('', layout=layout)
            event, values = window.read()
            window.close()
            if event in (None, 'Cancel'):
                return
            tablesname = values['tablesname']
            filename = values['filename']
            if filename:
                sqlfile = jirapath + 'script\\' + filename

        if gz:

            sql = """select TABLE_NAME from information_schema.TABLES t where TABLE_SCHEMA =database() and TABLE_NAME like 'z\_%' and TABLE_NAME NOT like 'z\_backup%' and TABLE_NAME NOT REGEXP '^z_excel_.*_20[0-9]{6}$' and TABLE_NAME NOT REGEXP '^z_newcreate_.*_20[0-9]{6}$' AND table_name NOT IN('z_newcreate_sequence_number_table','z_newcreate_revert_group_concat_bak','z_newcreate_extra_campaign','z_newcreate_extra_fund','z_newcreate_extra_purpose','z_newcreate_extra_tender','z_newcreate_extra_event','z_newcreate_extra_event_category_cd','z_newcreate_extra_membership_term','z_newcreate_extra_fundraiser','z_newcreate_all_custom_data_smoke_test', 'z_newcreate_all_custom_data_options_smoke_test', 'z_newcreate_wxm_membership_email_condition_smoke_test', 'z_newcreate_wxm_membershipEmailCondition_sep_smoke_test', 'z_newcreate_wxm_membershipLetterCondi_smoke_test', 'z_newcreate_wxm_membershipLetterCondi_sep_smoke_test', 'z_newcreate_wxm_shopping_cart_items_smoke_test', 'z_newcreate_prepare_statement_to_proceed_smoke_test', 'z_newcreate_wxm_may_be_drop_down_smoke_test', 'z_newcreate_cutoff_columns_smoke_test', 'z_newcreate_wxm_donation_smoke_test', 'z_newcreate_wxm_credit_card_id_smoke_test', 'z_newcreate_max_id_b4import_for_system_tables', 'z_newcreate_prepare_statement_to_proceed', 'z_newcreate_match_multiple_company', 'z_smoke_test_newcreate_past_duplicate_custom_field', 'z_newcreate_current_duplicate_custom_field_smoke_test', 'z_newcreate_check_table_name_column','z_newcreate_all_onelinetext_custom_field');"""
            conn = self.ConnDB.conndb(dbname)
            result = self.ConnDB.exec(conn, sql)
            result = ['drop table if exists ' + t[0] for t in result]
            sql = ';'.join(result)
            self.ConnDB.exec(conn, sql)

            sqlfile = jirapath + 'db_backup\\' + sqlname
            sqlfile_gzip = jirapath + '\\db_backup\\{0}.sql.gz'.format(dbname)
            op = 'mysqldump'
            ret = self.ConnDB.cmd(dbname, op, sqlfile)
            if ret == 0:
                with open(sqlfile, 'rb') as (f_in):
                    with gzip.open(sqlfile_gzip, 'wb') as (f_out):
                        shutil.copyfileobj(f_in, f_out)
                os.remove(sqlfile)

            # get after_size
            after_size = os.path.getsize(sqlfile_gzip) / 1024 / 1024
            after_size = int('{:.0f}'.format(after_size))
            try:
                b4_size = int(
                    self.HandleConfig.handle_config('g', currentwork,
                                                    'b4_size'))
                if b4_size < 100 and after_size >= 100:
                    sql = "DROP TABLE IF EXISTS z_newcreate_table_row;CREATE TABLE z_newcreate_table_row SELECT 'account' tableName, (SELECT count(*) FROM account) tableRow " \
                          "UNION ALL SELECT 'donation' tableName, (SELECT count(*) FROM donation) tableRow UNION ALL SELECT 'event_registration' tableName, (SELECT count(*) FROM event_registration) tableRow " \
                          "UNION ALL SELECT 'event_attendee' tableName, (SELECT count(*) FROM event_attendee) tableRow UNION ALL SELECT 'membership_listing' tableName, (SELECT count(*) FROM membership_listing) tableRow " \
                          "UNION ALL SELECT 'payment' tableName, (SELECT count(*) FROM payment) tableRow;"
                    self.ConnDB.exec(conn, sql)
                    sg.Popup(
                        'The size of database is more than 100M! See z_newcreate_table_row',
                        title=currentwork)
            except:
                pass

        else:
            ret = self.ConnDB.cmd(dbname, op, sqlfile, tablesname)
            if ret == 0:
                with open(sqlfile, 'a') as (fa):
                    fa.write('COMMIT;')
            if advanced:
                sg.Popup('Complete!')