def get_max_datetime(self, table_name): """ Calculates and returns the maximum date up to which data is already present in DB Parameter: name of table """ db = get_db() query = '''select {} from {}'''.format(self.column_check_date, table_name) cursor = db.cursor() try: cursor.execute(query) result = cursor.fetchall() dates = list() for item in result: dates.append(self.__get_datetime(item[0])) db.close() return max(dates) except Exception as e: try: db.close() except: pass print "Exception while generating max date :", e Logger.log('error', "Exception while generating max date : {}.".format(e)) return None
def __alter_table_collumn_add(self, db, table_name, new_collumns): """ Alters a table by adding new columns where data type will always be varchar(500) Parameters: a database connection object name of table list of new columns to be added """ for item in new_collumns: print "adding new column '{}' to '{}'".format(item, table_name) Logger.log( 'info', "adding new column '{}' to '{}'".format(item, table_name)) query = '''ALTER TABLE {} ADD {} varchar(500)'''.format( table_name, item) db.cursor().execute(query) print "column added." Logger.log('info', "column added.")
from feat_extract import FeatureExtractor from helpers import Logger labels = [] vectors = [] imgPath = "./data_gen/output/" l = Logger() images = [f for f in os.listdir(imgPath) if os.path.isfile(os.path.join(imgPath, f))] total = len(images) time_taken = 0 for i, image_name in enumerate(images): l.log("\033[2J\033[0;0H") l.log(f"Image {i+1} of {total}") filename = os.path.join(imgPath, image_name) l.log(f"reading {filename}") img = cv2.imread(filename, 0) img = np.reshape(img, (*img.shape, 1)) labels.append(image_name) l.log("extracting features...") st = time.time() vectors.append( FeatureExtractor(
def insert_db(self, data, table_name, data_updated_till, id_prefix): """ Inserts data to a given table Parameters: data from the API name of table maximum datetime up to which data is already update in the table prefix of id of table """ print "inside insert_db method" db = get_db() # Fetching columns in API fieldnames_api = data.next() fieldnames_api = [ item.lower().replace(" : ", "_").replace(" ", "_").replace("-", "_") for item in fieldnames_api ] try: column_check_date_index = fieldnames_api.index( self.column_check_date) except: print "WARNING !! {} not found in API response, GOING TO INSERT ALL DATA TO DATABASE.".format( ' '.join(self.column_check_date.split('_'))) Logger.log( 'warning', "{} not found in API response, GOING TO INSERT ALL DATA TO DATABASE." .format(' '.join(self.column_check_date.split('_')))) column_check_date_index = None # Fetching columns already present in out DB table query = "show columns from {}".format(table_name) cursor = db.cursor() cursor.execute(query) result = cursor.fetchall() fieldnames_db = list() for item in result[1:]: fieldnames_db.append(item[0]) difference = list(set(fieldnames_api) - set(fieldnames_db)) if len(difference) > 0: print "found new column(s)." Logger.log('info', "found new column(s).") try: self.__alter_table_collumn_add(db, table_name, difference) except Exception as e: print "Exception during alter table :", e Logger.log('error', "Exception during alter table : {}".format(e)) return None # fields structure to build the insert query if table_name == "inventory_master_db": fields = "%s, " * (len(fieldnames_api) + 2) else: fields = "%s, " * (len(fieldnames_api) + 1) fields = fields[:-2] max_id = self.__get_current_max_id(db, id_prefix, table_name) max_id = int(max_id) # fields to build the query string for building the insert query query_str = '' for item in fieldnames_api: query_str += item + ", " query_str = query_str + self.nmk_id_field if table_name == "inventory_master_db": query_str = query_str + ", last_modified" # building the final insert query query = '''insert into {} ({}) values ({})'''.format( table_name, query_str, fields) cursor = db.cursor() # Append id in each row of data to be inserted in DB table final_data = list() for row in data: row = [str(item) for item in row] if (column_check_date_index is not None) and (data_updated_till is not None): try: current_row_date_value = row[column_check_date_index] date = self.__get_datetime(current_row_date_value) except Exception as e: continue if data_updated_till < date: max_id += 1 final_data.append( self.__append_id(id_prefix, row, max_id, table_name)) else: max_id += 1 final_data.append( self.__append_id(id_prefix, row, max_id, table_name)) if (column_check_date_index is not None) and (data_updated_till is not None): print "Number of new row(s) found : {}".format(len(final_data)) Logger.log( 'info', "Number of new row(s) found : {}".format(len(final_data))) # If we have values to be inserted in table then we insert all data at once if len(final_data): try: print "inserting data into table '{}'".format(table_name) Logger.log('info', "inserting data into table '{}'".format(table_name)) row_count = cursor.executemany(query, final_data) db.commit() print "Number of row(s) inserted : {}".format(row_count) Logger.log('info', "Number of row(s) inserted : {}".format(row_count)) except Exception as e: print "Database insertion exception :", e Logger.log('error', "Database insertion exception : {}".format(e)) db.close()
def my_main(): # if __name__ == '__main__': Logger() # Loop through all tables provided in config for item in get_tables(): try: time.sleep(5) print "Search id :", item['search_id'] Logger.log('info', "Search id : {}".format(item['search_id'])) searchId = item['search_id'] table_name = item['table'] obj = Data(searchId, table_name) print "fetching data from API." Logger.log('info', "fetching data from API.") data = obj.read_data() print data if data == None: print "No response from API" Logger.log('warning', "No response from API.") else: print "data fetched from API." Logger.log('info', "data fetched from API.") if item.get('truncate'): obj.truncate_table(table_name) print "table '{}' truncated.".format(table_name) Logger.log('info', "table '{}' truncated.".format(table_name)) print "Calculating date till which data is updated." Logger.log('info', "Calculating date till which data is updated.") data_updated_till = obj.get_max_datetime(table_name) if data_updated_till: print "data updated till : {}".format(data_updated_till) Logger.log('info', "data updated till : {}".format(data_updated_till)) else: print "WARNING !! Unable to find max date, GOING TO INSERT ALL DATA TO DATABASE." Logger.log( 'warning', "Unable to find max date, GOING TO INSERT ALL DATA TO DATABASE." ) obj.insert_db(data, table_name, data_updated_till, item['id_prefix']) print "Done ...!!" except Exception as e: print "Exception :", e Logger.log('error', "Exception : {}".format(e)) traceback.print_exc() print "\n\n" return 'Success'