def steel_processing_batch() -> None: """ All in one place function - read from file and save in db :return: nothing for now """ print("Data extraction has been started.") # uncomment this line to do another test # SteelProcessing.query_delete_all() errors = [ ] # get all lines from file which cant be inserted id DB - invalid data/format/values # batch file name and location # only for simplicity - only one file with hardcoded name can be handled # for real project usually do it with input/output dir, where all files in input folder should be processed # and moved to output folder filepath = Config.BATCH_FILE_STEEL_PROCESSING # open file with data and save all rows one by one # the process of db insertion may be optimized by using # db.session.flush() after every row and db.session.commit() in the very end of process with open(filepath, mode='r') as csv_file: try: # here is csv reader - he reads csv as list of OrderedDic csv_reader = DictReader(csv_file) # skip header and do data extraction from the second line till the end next(csv_reader, None) # process data line by line for line in csv_reader: try: # TODO: its nice to have something to keep list row which has not been inserted because it # already exists # TODO: in real world example this kind of process may be better to do with api call or/and # some kind of batch update exists = SteelProcessing.query_add_by_id( line["id"], line["timestamp"], line["temperature"], line["duration"]) # TODO: specify which exceptions can be handled except Exception as error: print(repr(error)) errors.append(line) except Exception as error: print(repr(error)) # put all lines with errors in the file if len(errors) > 0: filepath = Config.get_file_batch_steel_processing_error() # overwrite previous content with open(filepath, 'w') as file_error: for item in errors: file_error.write("%s\n" % item) print("Data extraction has been completed.")
def steel_processing_batch() -> None: """ All in one place function - read from file and save in db :return: nothing for now """ print("Data extraction has been started.") # uncomment this line to do another test # SteelProcessing.query_delete_all() errors = [] # get all lines from file which cant be inserted id DB - invalid data/format/values # batch file name and location # only for simplicity - only one file with hardcoded name can be handled # for real project usually do it with input/output dir, where all files in input folder should be processed # and moved to output folder filepath = Config.BATCH_FILE_STEEL_PROCESSING # open file with data and save all rows one by one # the process of db insertion may be optimized by using # db.session.flush() after every row and db.session.commit() in the very end of process with open(filepath) as file_processing: # the first line is headers line = file_processing.readline() # use namedtuple moderator to be able not sensitive to column order StlProc = namedtuple('StlProc', line) while line: line = file_processing.readline() # basic validation - just to check is line empty or not if len(line) > 0: # if line can not be inserted - data error, It will be stored in error file try: current_row = StlProc(*line.split(',')) # TODO: its nice to have something to keep list row which has not been inserted because it # already exists # TODO: in real world example this kind of process may be better to do with api call or/and # some kind of batch update exists = SteelProcessing.query_add_by_id(current_row.id, current_row.timestamp, current_row.temperature, current_row.duration) # TODO: specify which exceptions can be handled except Exception as error: print(repr(error)) errors.append(line) # put all lines with errors in the file if len(errors) > 0: filepath = Config.get_file_batch_steel_processing_error() # overwrite reviews content with open(filepath, 'w') as file_error: for item in errors: file_error.write("%s\n" % item) print("Data extraction has been completed.")
def steel_processing_batch() -> None: """ All in one place function - read from file and save in db :return: nothing for now """ print("Data extraction has been started.") # uncomment this line to do another test # SteelProcessing.query_delete_all() errors = [ ] # get all lines from file which cant be inserted id DB - invalid data/format/values # batch file name and location # only for simplicity - only one file with hardcoded name can be handled # for real project usually do it with input/output dir, where all files in input folder should be processed # and moved to output folder filepath = Config.BATCH_FILE_STEEL_PROCESSING # open file with data and save all rows one by one # the process of db insertion may be optimized by using # db.session.flush() after every row and db.session.commit() in the very end of process try: # here is panda data frame in action - he convert the whole csv to data frame # it may be a little bit memory overuse, but very good approach for case if # we need to any data processing/analyses before save it to DB stell_proc_data_frame = read_csv(filepath) # process data line by line for index, line in stell_proc_data_frame.iterrows(): try: # TODO: its nice to have something to keep list row which has not been inserted because it # already exists # TODO: in real world example this kind of process may be better to do with api call or/and # some kind of batch update exists = SteelProcessing.query_add_by_id( line["id"], line["timestamp"], line["temperature"], line["duration"]) # TODO: specify which exceptions can be handled except Exception as error: print(repr(error)) errors.append(line) except Exception as error: print(repr(error)) # put all lines with errors in the file if len(errors) > 0: filepath = Config.get_file_batch_steel_processing_error() # overwrite revious content with open(filepath, 'w') as file_error: for item in errors: file_error.write("%s\n" % item) print("Data extraction has been completed.")