Exemple #1
0
 def get(self, page=1, name=None, date=None):
     if name:
         employee = self.collection.find_one({'name': name})
         return employee
     elif page == 'all':
         employees = self.collection.find(sort=[('name', 1)])
         employees_list = []
         for employee in employees:
             regular = True
             if 'regular' in employee and employee['regular'] == '비상근':
                 regular = False
             if 'email' in employee and employee['email']:
                 email = employee['email']
             else:
                 email = None
             if 'status' in employee and employee['status']:
                 status = employee['status']
             else:
                 status = None
             # 퇴사하지 않은 직원만 포함하기 위해서
             if status != '퇴사':
                 if 'endDate' not in employee:
                     employees_list.append({'name': employee['name'], 'employeeId': employee['employeeId'], 'email': email, 'regular': regular, 'status': status})
                 else:
                     if date and date <= employee['endDate']:
                         employees_list.append({'name': employee['name'], 'employeeId': employee['employeeId'], 'email': email, 'regular': regular, 'status': status})
         return employees_list
     else:
         data_list = self.collection.find(sort=[('department', 1), ('name', 1)])
         get_page = Page(page)
         return get_page.paginate(data_list)
Exemple #2
0
def handle_missing_page_id(aligned_page: Page, original_page: Page,
                           list_id: str, id_bounding_box: BoundingBox,
                           page_number: int) -> Tuple[bool, Page]:
    # to check if it's a homography issue, see if the list ID is visible on the raw page
    test_id = original_page.get_list_id(id_bounding_box)
    if test_id == list_id:
        print('Homography error on page {}, using uncorrected page instead.'.
              format(page_number))
        return True, original_page

    # didn't find on raw page, ask the user to confirm the ID
    id_area = original_page.get_roi(id_bounding_box)
    id_area = id_area.add_border(top=30)

    # annotate the response image
    question = "ID: {}? (y|n)".format(list_id)

    # keep looping until the y or n key is pressed
    while True:
        # display the image and wait for a keypress
        key = id_area.show(title="review", message=question, resize=False)

        # if the 'y' key is pressed, user approves
        if key == ord("y"):
            cv2.destroyAllWindows()
            return True, original_page

        # if the 'n' key is pressed, user rejects
        elif key == ord("n"):
            cv2.destroyAllWindows()
            break

    return False, aligned_page
Exemple #3
0
def generate_error_pages(error_images: List[Image], skipped_pages: List[Page],
                         list_id: str) -> None:
    page = utils.make_blank_page()
    page.crop(top=int(2 * utils.MARGIN),
              right=int(2 * utils.MARGIN))  # to account for printing

    # calculate the number of error images that can fit on a page
    num_images_per_page = math.floor(page.size.h / error_images[0].size.h)

    # init error pages array
    error_pages = skipped_pages
    images_on_page = 0

    for i, error_image in enumerate(error_images):
        # Create new pages as necessary
        if i % num_images_per_page == 0 and i > 0:
            # save the previous page to the error_pages array
            page.add_border(top=int(2 * utils.MARGIN),
                            right=int(2 * utils.MARGIN))
            error_pages.append(page)
            images_on_page = 0

        # add images to the page
        insert_point = Point(0, images_on_page * error_image.size.h)
        page = Page(page.insert_image(error_image, insert_point))

        # increment the images on page counter
        images_on_page += 1

    # add the last page to the array
    page.add_border(top=int(2 * utils.MARGIN), right=int(2 * utils.MARGIN))
    error_pages.append(page)

    # save out a pdf
    save_error_pages(error_pages, list_id)
Exemple #4
0
 def get_device_list(self, page=1, date=None):
     device_list = []
     data_list = self.collection.aggregate([
         {'$match':{'date': date, 'time': {"$gt": WORKING['time']['overNight']}}},
         {'$group':{'_id': '$mac'}}])
     for data in data_list:
         for key in data:
             device_list.append(data[key])
     if page == 'all':
         return device_list
     else:
         get_page = Page(page)
         return get_page.paginate(device_list)
Exemple #5
0
 def get(self, page=1, date=None):
     if date is None and page=='all':
         device_list = self.collection.find()
     else:
         if date is None:
             _, _, today, _ = check_time()
             date = today
         date = get_date_several_months_before(date, delta=2)
         device_list = self.collection.find({'endDate': {"$gt": date}})
     if page == 'all':
         return device_list
     else:
         get_page = Page(page)
         return get_page.paginate(device_list)
Exemple #6
0
def markup_response_codes(page: Page, list_id: str,
                          line_number: int) -> List[ResponseCode]:
    # Get line in the page.
    line_bb = page.get_line_bb(line_number,
                               utils.get_list_dir(list_id),
                               padding=100,
                               right_half=True)
    markup_roi = page.get_roi(line_bb)

    # Iterate through and mark each scan code.
    response_codes = []
    question_number = 1
    while True:
        print("Please mark each response code in survey question %d." %
              question_number)

        bounding_box = markup_roi.markup()
        text = markup_roi.find_text(bounding_box)
        # sometimes OCR picks up stray symbols, get rid of them.
        text = ''.join(ch for ch in text if ch.isalnum())

        bounding_box = bounding_box.update_coordinate_system(line_bb.top_left)
        roi = page.get_roi(bounding_box)
        response_code = ResponseCode(bounding_box, question_number, text)

        print("Extracted scan code: \"%s\"" % response_code.value)

        while True:
            print("Is this correct? [y|n]")
            yes_no = input().lower()
            if yes_no == "y":
                break
            else:
                print("Please enter the correct response code: ")
                response_code.value = input()

        response_codes.append(response_code)

        print(
            r"""Hit enter (no input) to mark another response in the same survey question.
            Enter 'n' to move to the next survey question. Enter 'q' to finish. [enter|n|q]"""
        )
        next_step = input().lower()

        if next_step == "n":
            question_number += 1
        elif next_step == "q":
            break

    return response_codes
Exemple #7
0
    def summary(self, page=1, start=None, end=None):

        data_list = None
        summary_list = []
        if start and end:
            data_list = self.collection.find(
                {'date': {
                    "$gte": start,
                    "$lte": end
                }},
                sort=[('name', 1), ('date', -1)])
        if data_list:
            summary = OrderedDict()
            for data in data_list:
                if data['workingHours'] is not None:
                    name = data['name']
                    if name not in summary:
                        summary[name] = {'name': name}
                    if 'totalDay' not in summary[name]:
                        summary[name]['totalDay'] = 0
                    if 'totalWorkingDay' not in summary[name]:
                        summary[name]['totalWorkingDay'] = 0
                    if 'totalWorkingHours' not in summary[name]:
                        summary[name]['totalWorkingHours'] = 0
                    for status in WORKING['inStatus']:
                        if status not in summary[name]:
                            summary[name][status] = 0
                    for status in WORKING['status']:
                        if status not in summary[name]:
                            summary[name][status] = 0

                    summary[name]['totalDay'] = summary[name]['totalDay'] + 1
                    if 'status' in data:
                        if data['status'][0]:
                            summary[name][data['status'][0]] = summary[name][
                                data['status'][0]] + 1
                    if 'reason' in data and data['reason'] and data[
                            'reason'] in summary[name]:
                        summary[name][
                            data['reason']] = summary[name][data['reason']] + 1
                    summary[name]['totalWorkingHours'] = summary[name][
                        'totalWorkingHours'] + data['workingHours']
            for name in summary:
                summary[name] = self.get_summary(summary[name])
                summary_list.append(summary[name])

        get_page = Page(page)
        return get_page.paginate(summary_list)
Exemple #8
0
def get_temp_page(pages, page_number: int, rotate_dir: Rotation,
                  temp_filename: str) -> Page:
    temp_path = "%s%s" % (utils.TEMP_DIR, temp_filename)
    pages[page_number - 1].save(
        temp_path,
        'JPEG')  # Save specified page out to temp so we can read it in again
    return Page.from_file(temp_path, rotate_dir)
Exemple #9
0
def scan_page(list_id: str, rotate_dir: Rotation, args, page_number: int,
              ref_page: Page, ref_bounding_boxes: Dict[str, BoundingBox],
              list_dir: str, results_scans, results_stats, results_errors,
              previous_scans: dict, backup_writer):
    page = Page.from_file(utils.get_page_filename(list_id, page_number),
                          rotate_dir)
    response_codes = utils.load_response_codes(list_id)

    # align page
    aligned_page = page.align_to(ref_page)
    if utils.__DEBUG__:
        aligned_page.show(title="aligned page")

    # confirm page has the correct list_id
    page_list_id = page.get_list_id(ref_bounding_boxes["list_id"])
    if page_list_id != list_id:
        valid_id, page = handle_missing_page_id(aligned_page, page, list_id,
                                                ref_bounding_boxes["list_id"],
                                                page_number)
        if not valid_id:
            print(
                'Error: Page {} has ID {}, but active ID is {}. Page {} has been skipped.'
                .format(page_number + 1, page_list_id, list_id,
                        page_number + 1))
            results_errors['skipped_pages'].append({page_number: page})
            return results_scans, results_stats, results_errors

    # find the barcodes in the image and decode each of the barcodes
    # Barcode scanner needs the unthresholded image.
    barcodes = pyzbar.decode(page.raw_image)
    if len(barcodes) == 0:
        print('Error: Cannot find barcodes. Page {} has been skipped.'.format(
            page_number + 1))
        results_errors['skipped_pages'].append({page_number: page})
        return results_scans, results_stats, results_errors

    # loop over the detected barcodes
    voter_ids: Set = set()
    for barcode in barcodes:
        results_scans, results_stats, results_errors = scan_barcode(
            barcode, page, ref_bounding_boxes, list_dir, response_codes, args,
            results_scans, results_stats, results_errors, previous_scans,
            backup_writer, voter_ids)
    check_num_barcodes(page, list_dir, len(voter_ids), results_stats)

    if utils.__DEBUG__:
        page.show()

    return results_scans, results_stats, results_errors
Exemple #10
0
def check_num_barcodes(page: Page, list_dir: str, num_scanned_barcodes: int,
                       results_stats) -> None:
    # Manually loop and count barcodes
    num_actual_barcodes = 0

    for line_number in range(1, utils.MAX_BARCODES_ON_PAGE + 1):
        line_bb = page.get_line_bb(line_number, list_dir)
        # extract the barcode portion
        line_bb.top_left.x = line_bb.bottom_right.x - 700
        barcode_roi = page.get_roi(line_bb).invert()

        BARCODE_EXISTS_THRESHOLD = 20000  # if a barcode exists in the area it averages 29k black pixels.
        if barcode_roi.numWhitePixels() > BARCODE_EXISTS_THRESHOLD:
            num_actual_barcodes += 1
        else:
            break  # we have likely reached the end of the page.

    if num_actual_barcodes < num_scanned_barcodes:
        print(
            "Something went wrong with the image alignment! Cannot accurately count missed barcodes."
        )
    elif num_actual_barcodes > num_scanned_barcodes:
        results_stats[
            "num_missed_barcodes"] += num_actual_barcodes - num_scanned_barcodes
Exemple #11
0
 def get(self, page=1, start=None, end=None):
     data_list = self.collection.find(sort=[('create_time', -1)])
     get_page = Page(page)
     return get_page.paginate(data_list)
Exemple #12
0
def main() -> None:
    args = parse_args()
    list_id = args["list_id"]
    check_files_exist(list_id)
    list_dir: str = utils.get_list_dir(list_id)
    rotate_dir = utils.map_rotation(args["rotate_dir"])

    ref_bounding_boxes = utils.load_ref_boxes(list_dir)
    ref_page = Page.from_file(list_dir + utils.CLEAN_IMAGE_FILENAME,
                              rotate_dir)

    # init results object
    results_scans: list = []

    # things to track for error reporting
    results_stats = {}
    results_stats['num_scanned_barcodes'] = 0
    results_stats['num_missed_barcodes'] = 0
    results_stats['num_error_barcodes'] = 0
    results_stats['incorrect_scans'] = []

    # stuff to build error PDF for human scanning
    results_errors: dict = {}
    results_errors['errors_for_human'] = []
    results_errors['skipped_pages'] = []

    # write out to CSV backup as process the list
    backup_filename, colnames = prep_backup_csv(list_dir, list_id)
    previous_scans = load_previous_scans(backup_filename, args)

    with open(backup_filename, mode='w') as backup_csv:
        backup_writer = csv.DictWriter(backup_csv, fieldnames=colnames)
        backup_writer.writeheader()

        num_pages = len(
            os.listdir("{}/{}".format(list_dir, utils.WALKLIST_DIR)))
        for page_number in range(args['start_page'], num_pages):

            print('===Scanning page {} of {} ==='.format(
                page_number + 1, num_pages))

            results_scans, results_stats, results_errors = scan_page(
                list_id, rotate_dir, args, page_number, ref_page,
                ref_bounding_boxes, list_dir, results_scans, results_stats,
                results_errors, previous_scans, backup_writer)

    # output results
    output_results_csv(args['list_id'], list_dir, results_scans)
    # generate_error_pages(results_errors['errors_for_human'], results_errors['skipped_pages'], args['list_id'])

    # show list of skipped pages
    print('Skipped {} pages:'.format(len(results_errors['skipped_pages'])))
    for page in results_errors['skipped_pages']:
        print(page.keys())

    # run test suite if set
    if args["test_file"]:
        test.run_test_suite(args['test_file'], results_scans)

    else:
        # print statistics
        show_statistics(results_stats, args)
Exemple #13
0
def scan_barcode(barcode, page, ref_bounding_boxes, list_dir, response_codes,
                 args, results_scans, results_stats, results_errors,
                 previous_scans, backup_writer,
                 voter_ids) -> Tuple[list, dict, dict]:
    barcode_info = extract_barcode_info(barcode, page)

    # skip if not a valid barcode
    if not barcode_info:
        return results_scans, results_stats, results_errors

    barcode_coords, voter_id = barcode_info

    # Check if the barcode has already been read, skip if so.
    if voter_id in voter_ids:
        return results_scans, results_stats, results_errors
    else:
        voter_ids.add(voter_id)

    # increment barcodes counter
    results_stats['num_scanned_barcodes'] += 1

    # use the existing info if already scanned, unless in testing mode
    if voter_id in previous_scans and not args["test_file"]:
        print('Already scanned {}'.format(voter_id))
        results_dict = previous_scans[voter_id]

    # new barcode to scan
    else:
        if utils.__DEBUG__:
            cv2.rectangle(page, barcode_coords.top_left.to_tuple(),
                          barcode_coords.bottom_right.to_tuple(),
                          (255, 0, 255), 3)
            page.show()

        # Get the corresponding response codes region
        response_bounding_box = get_response_for_barcode(
            barcode_coords, ref_bounding_boxes["response_codes"], page.size)

        # Figure out which ones are circled
        ref_response_codes = Page.from_file(
            list_dir + utils.RESPONSE_CODES_IMAGE_FILENAME, Rotation.NONE)
        circled_responses, has_error = get_circled_responses(
            response_bounding_box, response_codes, page, list_dir)
        has_error = has_error or error_check_responses(circled_responses)

        # if has an error at this point, add to the error tally
        if has_error:
            results_stats['num_error_barcodes'] += 1

        # Do manual review if error or if flagged, unless in testing mode
        if (has_error or args["manual_review"]) and not args["test_file"]:
            verdict_right, circled_responses = manual_review(
                response_bounding_box, page, circled_responses, voter_id,
                response_codes)

            # if user verdict is false, add the voter_id to the list of incorrect scans
            if not verdict_right:
                results_stats['incorrect_scans'].append(voter_id)

        # if in testing mode, convert any None circled_responses to an empty list
        if args["test_file"] and circled_responses is None:
            circled_responses = []

        # build results dict
        results_dict = build_results_dict(voter_id, circled_responses)

    # save results
    results_scans.append(results_dict)
    write_to_backup(results_dict, backup_writer)

    return results_scans, results_stats, results_errors
Exemple #14
0
def create_error_image(page: Page, barcode_coords: BoundingBox,
                       first_response_coords: BoundingBox) -> Image:
    full_response_bounding_box = get_response_including_barcode(
        barcode_coords, first_response_coords, page.size)
    error_image = page.get_roi(full_response_bounding_box)
    return error_image
Exemple #15
0
def scrape_words(url):
    page = Page(url)
    words = page.css('.podcast_table_home .pod_body b')
    return [word.text_content().lower().strip()
            for word in words]
Exemple #16
0
    def attend(self, page=None, name=None, start=None, end=None):
        if page == 'all':
            employee = self.employee.get(name=name)
            if start:
                data_list = self.collection.find({'date': {"$gte": start, "$lte": end}, 'name': name},
                                                 sort=[('name', 1), ('date', 1)])
            else:
                data_list = self.collection.find({'name': name}, sort=[('date', 1)])
            attend_list = []
            for data in data_list:
                if data['begin']:
                    begin = data['begin'][0:2] + ':' + data['begin'][2:4]
                else:
                    begin = ''
                if data['end']:
                    end = data['end'][0:2] + ':' + data['end'][2:4]
                else:
                    end = ''
                if 'reason' in data:
                    reason = data['reason']
                else:
                    reason = ''
                attend_list.append(
                    {'name': data['name'], 'rank': employee['rank'], 'department': employee['department'],
                     'date': data['date'], 'begin': begin, 'end': end, 'reason': reason})
            return attend_list
        else:
            if start and end:
                if name:
                    data_list = self.collection.find({'date': {"$gte": start, "$lte": end}, 'name': name},
                                                     sort=[('name', 1), ('date', -1)])
                else:
                    data_list = self.collection.find({'date': {"$gte": start, "$lte": end}},
                                                     sort=[('name', 1), ('date', -1)])
            else:
                if name:
                    data_list = self.collection.find({'date': self.today, 'name': name}, sort=[('date', -1)])
                else:
                    data_list = self.collection.find({'date': self.today}, sort=[('name', 1)])

            get_page = Page(page)
            paging, data_list = get_page.paginate(data_list)

            summary = OrderedDict()
            if name:
                attend_list = []
                summary['totalDay'] = 0
                summary['totalWorkingDay'] = 0
                summary['totalWorkingHours'] = 0
                for status in WORKING['inStatus']:
                    summary[status] = 0
                for status in WORKING['status']:
                    summary[status] = 0
                for data in data_list:
                    if data['workingHours'] is not None:
                        summary['totalDay'] = summary['totalDay'] + 1
                        del data['_id']
                        if 'status' in data:
                            if data['status'][0]:
                                summary[data['status'][0]] = summary[data['status'][0]] + 1
                        if 'reason' in data and data['reason']:
                            summary[data['reason']] = summary[data['reason']] + 1
                        summary['totalWorkingHours'] = summary['totalWorkingHours'] + data['workingHours']
                    attend_list.append(data)
                summary = self.get_summary(summary)
                return paging, self.today, attend_list, summary
            else:
                return paging, self.today, data_list, summary
Exemple #17
0
def scrape_words(url):
    page = Page(url)
    words = page.css('.podcast_table_home .pod_body b')
    return [word.text_content().lower().strip() for word in words]