def get_plurals(img, cols): lines = utils._get_lines(img, cols, 5, 6, "pairplurals", psm_mode=6, apply_morphology=True) text = "" for line in lines: text += line.text.replace("\n", "").replace(" ", "") return text
def get_reference(img, cols): lines = utils._get_lines(img, cols, 0, 1, "pairrefs", psm_mode=6, apply_morphology=True) text = '' for line in lines: text += line.text.upper().replace(" ", "").replace("\n", "") return text
def get_default_operator(img, cols): lines = utils._get_lines(img, cols, 4, 5, "pairdefop", psm_mode=6, apply_morphology=True) text = "" for line in lines: text += line.text.replace("\n", "").replace(" ", "") return text
def get_hits(img, cols): lines = utils._get_lines(img, cols, 1, 2, "digits", psm_mode=6, apply_morphology=True) text = '' for line in lines: text += line.text.replace("\n", " ").replace(" ", "") return text
def get_query(img, cols): lines = utils._get_lines(img, cols, 2, 3, psm_mode=3) text = '' for line in lines: text += line.text.replace("\n", " ") # Correct a common reading error. text = text.replace("cc|s", "ccls") return text
def get_date_time(img, cols): lines = utils._get_lines(img, cols, -2, -1, "pairdate") text = '' for line in lines: text += line.text.replace("\n", " ").replace(" ", "") # Extract the date string. m = re.match(r'\d{4}/\d{2}/\d{2}', text) try: date = m.group(0) except: date = '' # Extract the time string. timepos = text.find(":") time = '' if timepos > 0: try: time = text[timepos - 2:timepos + 3] except: time = '' return date, time
def get_databases(img, cols): lines = utils._get_lines(img, cols, 3, 4, "pairdbs") text = '' for line in lines: text += line.text.replace("\n", "").replace(" ", "") return text