def run(table): positions = dict() fields = dict() with open(settings.mypath + table + ".txt", "r", encoding='utf-8') as f: reader = csv.reader(f, delimiter="│") for i, row in enumerate(reader): for j, cell in enumerate(row): if i == 0: positions[j] = cell fields[cell] = [] else: try: fields[positions[j]].append(len((cell))) except Exception as e: print(e) print(cell) raise e google_sheet = settings.open_gspread_connetion(table) for key, value in fields.items(): try: cell = google_sheet.find(key) max_length = max(value) google_sheet.update_cell(cell.row, 5, max_length) print(key + ": " + str(max_length)) except Exception as e: print(e) traceback.print_exc() google_sheet = settings.open_gspread_connetion(table)
def run(table, limit): positions = dict() fields = dict() with open(settings.mypath + table + ".txt", "r") as f: reader = csv.reader(f, delimiter="|") for i, row in enumerate(excel_sheet.rows): for j, cell in enumerate(row): if i == 0: positions[j] = cell fields[cell] = set() else: if "null" not in str(cell): fields[positions[j]].add((str(cell.value))) google_sheet = settings.open_gspread_connetion(table) for key, value in fields.items(): if(len(fields[key]) / (excel_sheet.get_highest_row() - 1) < limit): try: cell = google_sheet.find(key) cardinality = '{' + ', '.join(str(s) for s in fields[key]) + '}' google_sheet.update_cell(cell.row, 9, cardinality) print(key + ': ' + cardinality) except Exception as e: print(e) traceback.print_exc() google_sheet = settings.open_gspread_connetion(table)
def run(table, limit_percent_null): positions = dict() fields = dict() with open(settings.mypath + table + ".txt", "r", encoding='utf-8') as f: reader = csv.reader(f, delimiter="│") for i, row in enumerate(reader): for j, cell in enumerate(row): if i == 0: positions[j] = cell fields[cell] = [] else: if not str(cell): fields[positions[j]].append(True) else: fields[positions[j]].append(True if "null" in str(cell) else False) google_sheet = settings.open_gspread_connetion(table) for key, value in fields.items(): percent_null = sum(fields[key]) / len(fields[key]) print("{0}: {1}".format(key, percent_null)) # 90% of values for this field have "null" or are empty if percent_null > limit_percent_null: try: cell = google_sheet.find(key) google_sheet.update_cell(cell.row, 1, "{0:.2f} percent values are null".format(percent_null)) except Exception as e: print(e) traceback.print_exc() google_sheet = settings.open_gspread_connetion(table)
def match(ratio): for table in settings.tables: try: salesforce_fields = get_salesforce_fields(table.upper()) # now we have list of all our fields except Exception as e: print(e) continue ssle_fields = [] mypath = "../data/xlsx/" with open(mypath + table + ".csv", "r") as f: reader = csv.reader(f, delimiter=",") for i, row in enumerate(reader): if i == 0: ssle_fields = row break fields_dict = dict() for ssle_field in ssle_fields: fields_dict[ssle_field] = [] for salesforce_field in salesforce_fields: salesforce_field_formatted = salesforce_field.replace('__c','') result = SequenceMatcher(lambda x: x == "_", ssle_field, salesforce_field_formatted).ratio() if result > ratio: print("{" + ssle_field + ", " + salesforce_field_formatted + "}: " + str(result)) fields_dict[ssle_field].append(salesforce_field) google_sheet = settings.open_gspread_connetion(table) for key, value in fields_dict.items(): try: cell = google_sheet.find(key) google_sheet.update_cell(cell.row, 6, '{' + ', '.join(str(s) for s in fields_dict[key]) + '}') print(key + ": {" + ', '.join(str(s) for s in fields_dict[key]) + '}') except Exception as e: print(e) traceback.print_exc() google_sheet = settings.open_gspread_connetion(table)
def run(table): script = "" fields = [] with open(settings.mypath + table + ".txt", "r", encoding='utf-8') as f: reader = csv.reader(f, delimiter="│") for i, row in enumerate(reader): for j, cell in enumerate(row): if i == 0: fields = row break fields = [x for x in fields if x] google_sheet = settings.open_gspread_connetion(table) with open("../../sql scripts/" + table + ".sql",'w') as sql: sql.write("CREATE TABLE " + table + "\n") sql.write("(" + "\n") for field in fields: print(field) cell = google_sheet.find(field) data_type = google_sheet.cell(cell.row, 4).value size = google_sheet.cell(cell.row, 5).value sql.write(field + ' ' + data_type) if("varchar" in data_type): sql.write('(' + str(roundup(size)) + ')') if(field != fields[-1]): sql.write(",") sql.write("\n") sql.write(");")