Beispiel #1
0
def steel_processing_batch() -> None:
    """
    All in one place function - read from file and save in db

    :return: nothing for now
    """
    print("Data extraction has been started.")
    # uncomment this line to do another test
    # SteelProcessing.query_delete_all()
    errors = [
    ]  # get all lines from file which cant be inserted id DB - invalid data/format/values

    # batch file name and location
    # only for simplicity - only one file with hardcoded name can be handled
    # for real project usually do it with input/output dir, where all files in input folder should be processed
    # and moved to output folder
    filepath = Config.BATCH_FILE_STEEL_PROCESSING

    # open file with data and save all rows one by one
    # the process of db insertion  may be optimized by using
    # db.session.flush() after every row and db.session.commit() in the very end of process

    with open(filepath, mode='r') as csv_file:
        try:
            # here is csv reader - he reads csv as list of OrderedDic
            csv_reader = DictReader(csv_file)
            # skip header and do data extraction from the second line till the end
            next(csv_reader, None)
            # process data line by line
            for line in csv_reader:
                try:
                    # TODO: its nice to have something to keep list row which has not been inserted because it
                    #  already exists
                    # TODO: in real world example this kind of process may be better to do with api call or/and
                    # some kind of batch update
                    exists = SteelProcessing.query_add_by_id(
                        line["id"], line["timestamp"], line["temperature"],
                        line["duration"])
                # TODO: specify which exceptions can be handled
                except Exception as error:
                    print(repr(error))
                    errors.append(line)
        except Exception as error:
            print(repr(error))

    # put all lines with errors in the file
    if len(errors) > 0:
        filepath = Config.get_file_batch_steel_processing_error()
        # overwrite previous content
        with open(filepath, 'w') as file_error:
            for item in errors:
                file_error.write("%s\n" % item)
    print("Data extraction has been completed.")
def steel_processing_batch() -> None:
    """
    All in one place function - read from file and save in db

    :return: nothing for now
    """
    print("Data extraction has been started.")
    # uncomment this line to do another test
    # SteelProcessing.query_delete_all()
    errors = []  # get all lines from file which cant be inserted id DB - invalid data/format/values

    # batch file name and location
    # only for simplicity - only one file with hardcoded name can be handled
    # for real project usually do it with input/output dir, where all files in input folder should be processed
    # and moved to output folder
    filepath = Config.BATCH_FILE_STEEL_PROCESSING

    # open file with data and save all rows one by one
    # the process of db insertion  may be optimized by using
    # db.session.flush() after every row and db.session.commit() in the very end of process
    with open(filepath) as file_processing:
        # the first line is headers
        line = file_processing.readline()
        # use namedtuple moderator to be able not sensitive to column order
        StlProc = namedtuple('StlProc', line)

        while line:
            line = file_processing.readline()
            # basic validation - just to check is line empty or not
            if len(line) > 0:
                # if line can not be inserted - data error, It will be stored in error file
                try:
                    current_row = StlProc(*line.split(','))
                    # TODO: its nice to have something to keep list row which has not been inserted because it
                    #  already exists
                    # TODO: in real world example this kind of process may be better to do with api call or/and
                    # some kind of batch update
                    exists = SteelProcessing.query_add_by_id(current_row.id,
                                                    current_row.timestamp,
                                                    current_row.temperature,
                                                    current_row.duration)
                # TODO: specify which exceptions can be handled
                except Exception as error:
                    print(repr(error))
                    errors.append(line)
    # put all lines with errors in the file
    if len(errors) > 0:
        filepath = Config.get_file_batch_steel_processing_error()
        # overwrite reviews content
        with open(filepath, 'w') as file_error:
            for item in errors:
                file_error.write("%s\n" % item)
    print("Data extraction has been completed.")
Beispiel #3
0
def steel_processing_batch() -> None:
    """
    All in one place function - read from file and save in db

    :return: nothing for now
    """
    print("Data extraction has been started.")
    # uncomment this line to do another test
    # SteelProcessing.query_delete_all()
    errors = [
    ]  # get all lines from file which cant be inserted id DB - invalid data/format/values

    # batch file name and location
    # only for simplicity - only one file with hardcoded name can be handled
    # for real project usually do it with input/output dir, where all files in input folder should be processed
    # and moved to output folder
    filepath = Config.BATCH_FILE_STEEL_PROCESSING

    # open file with data and save all rows one by one
    # the process of db insertion  may be optimized by using
    # db.session.flush() after every row and db.session.commit() in the very end of process
    try:
        # here is panda data frame in action - he convert the whole csv to data frame
        # it may be a little bit memory overuse, but very good approach for case if
        # we need to any data processing/analyses before save it to DB
        stell_proc_data_frame = read_csv(filepath)
        # process data line by line
        for index, line in stell_proc_data_frame.iterrows():
            try:
                # TODO: its nice to have something to keep list row which has not been inserted because it
                #  already exists
                # TODO: in real world example this kind of process may be better to do with api call or/and
                # some kind of batch update
                exists = SteelProcessing.query_add_by_id(
                    line["id"], line["timestamp"], line["temperature"],
                    line["duration"])
            # TODO: specify which exceptions can be handled
            except Exception as error:
                print(repr(error))
                errors.append(line)
    except Exception as error:
        print(repr(error))

    # put all lines with errors in the file
    if len(errors) > 0:
        filepath = Config.get_file_batch_steel_processing_error()
        # overwrite revious content
        with open(filepath, 'w') as file_error:
            for item in errors:
                file_error.write("%s\n" % item)
    print("Data extraction has been completed.")