def get_checklist_for_renewal(Renewal_Notice):
    temp = np.int64(Renewal_Notice // 30)
    return [
        str(temp) + " months",
        n2w.convert(temp) + " months",
        n2w.convert(temp) + " month",
        str(temp) + " month"
    ]
Exemple #2
0
def get_hint(num):
    url = 'https://en.wikipedia.org/wiki/' + str(num) + '_(number)'

    html = requests.get(url)

    bs = BeautifulSoup(html.text, 'html.parser')

    divs = bs.find('div', attrs={'id': 'mw-content-text'}).findAll('div')

    hint = ''

    for i in range(100):
        hint = get_hint_text(divs)
        if len(hint) > 40:
            text_num = n2w.convert(num)
            num_rep_list = [text_num, text_num.title(), text_num.upper()]
            num_rep_list_dash = [x.replace(' ', '-') for x in num_rep_list]
            num_rep_list_nospace = [x.replace(' ', '') for x in num_rep_list]
            num_rep_list = [
                num
            ] + num_rep_list + num_rep_list_dash + num_rep_list_nospace
            for num_rep in num_rep_list:
                try:
                    hint = hint.replace(num_rep, '????')
                except:
                    hint = ''
            break

    if not hint.strip():
        hint = 'No hint for you'

    return hint
def preprocessDataset(dataset, foreign_characters, url, numbers, lemmatize):
    processed_set = dataset
    if foreign_characters:
        aux_set = []
        for line in processed_set:
            aux_set.append(" ".join([
                element for element in re.sub(r'[^\x00-\x7F]+', ' ',
                                              line).rstrip().split(' ')
                if element != ''
            ]))
        processed_set = aux_set
    if numbers:
        aux_set = []
        for line in processed_set:
            aux_set.append(" ".join([
                element if isNumber(element) == False else convert(element)
                for element in line.rstrip().split(' ')
            ]))
        processed_set = aux_set
    if url:
        aux_set = []
        for line in processed_set:
            aux_set.append(" ".join([
                'REDACTEDURL' if bool(urlparse(element)[1]) else element
                for element in line.rstrip().split(' ')
            ]))
        processed_set = aux_set
    if lemmatize:
        tk = LemmaTokenizer()
        processed_set = [" ".join(tk(line.lower())) for line in processed_set]
    return processed_set
Exemple #4
0
def arqmath_post_read_html5_worker(args):
    post_id, html5_tokens = args
    xml_document = html5_unicode_to_tree(html5_tokens)
    math_tokens = {}
    for math_element_number, math_element in enumerate(
            xml_document.xpath('//span[@class="math-container"]')):
        math_element_token = 'math_element_{}___'.format(
            re.sub(r'\s+', '_', n2w.convert(math_element_number)))
        replacement = etree.Element("span")
        replacement.text = math_element_token
        if math_element.tail:
            replacement.text += ' ' + math_element.tail
        math_element.getparent().replace(math_element, replacement)
        if 'id' in math_element.attrib:
            math_element_id = math_element.attrib['id']
        else:
            math_element_id = None
        math_element_text = re.sub(r'^(\$*)(.*)\1', r'\2', math_element.text
                                   or '')
        math_tokens[math_element_token] = (math_element_id,
                                           Math(math_element_text))
    document = [
        math_tokens[token] if token in math_tokens else Text(token)
        for token in simple_preprocess(' '.join(xml_document.itertext()))
    ]
    return (post_id, document)
Exemple #5
0
def ntcir_article_read_html5_worker(args):
    zip_filename, filename, only_latex = args
    try:
        with ZipFile(zip_filename, 'r') as zf:
            with zf.open(filename, 'r') as f:
                if only_latex:
                    html5_parser = etree.HTMLParser(huge_tree=True)
                    xml_document = etree.parse(f, html5_parser)
                else:
                    html5_tokens = f.read().decode('utf-8')
                    xml_tokens = mathmlcan(html5_to_xhtml(html5_tokens))
                    xml_document = unicode_to_tree(xml_tokens)
        math_tokens = {}
        for math_element_number, math_element in enumerate(
                xml_document.xpath('//math')):
            math_element_token = 'math_element_{}___'.format(
                re.sub(r'\s+', '_', n2w.convert(math_element_number)))
            replacement = etree.Element("span")
            replacement.text = math_element_token
            if math_element.tail:
                replacement.text += ' ' + math_element.tail
            math_element.getparent().replace(math_element, replacement)
            math_tokens[math_element_token] = Math(
                tree_to_unicode(math_element))
        document = [[
            math_tokens[token] if token in math_tokens else Text(token)
            for token in simple_preprocess(' '.join(paragraph.itertext()))
        ] for paragraph in xml_document.xpath(
            '//div[contains(@class, "ltx_para")]')]
    except (etree.Error, UnicodeDecodeError) as e:
        document = []
    return (zip_filename, filename, document)
Exemple #6
0
    def test_challenge4(self):

        i = 3

        while i < 20:
            num = fibonacci(i)
            print(str(num), " - ", (n2w.convert(num)))

            i += 1
Exemple #7
0
    def __name(file_name: str) -> str:
        name = file_name.split('.')[0].split('@')[0].replace(' ', '')
        if name[0].isdigit():
            #  find numbers in the begin of the asset name
            number = [int(s) for s in re.findall(r'-?\d+\.?\d*', name)][0]
            #  convert number in words
            number_name = n2w.convert(number)
            #  remove numbers from name
            name = name[len(str(number)):]
            #  join number words to name without number in begin
            name = number_name + ' ' + name
            # convert to snake case to convert after to camelcase (don't work convert directly to camel case) 
            name = stringcase.snakecase(name)
            name = stringcase.camelcase(name)

        return name
Exemple #8
0
def process_line(line):
    """ Preprocessing of the text data:
            :1: Characters not in [A-Za-z0-9\s'] removed
            :2: Arabic numbers are converted to English numbers 
                like 1000 to one thousand
            :3: Repeated whitespace characters are squeezed to 
                one and the beginning whitespace characters are removed
            :4: All characters are converted to lowercase
    """
    # Splitting will remove repeated white spaces
    line = re.sub(r"[^a-zA-z0-9\s]", "", line).split()
    for index, word in enumerate(line):
        line[index] = line[index].lower()
        if word.isdigit():
            try:
                line[index] = convert(int(word))
            except:
                pass
    return " ".join(line) + "\n"
import datetime
import n2w
nowTime = datetime.datetime.now()
print(nowTime)
print(nowTime.hour)
print(nowTime.minute)
print(nowTime.day)
print(nowTime.month)
print(nowTime.strftime("%B"))
print(nowTime.year)

print(nowTime.strftime('%A %d %B %Y'))
print(nowTime.strftime('%A %d %B'))

print(n2w.convert(2017))

ampm = " A M "
hour = int(nowTime.hour)
if hour > 12:
    hour -= 12
    ampm = " P M "

timeInWords = str(hour) + " " + str(nowTime.minute) + ampm
print(timeInWords)
dateInwords = nowTime.strftime('%A %d %B') + " " + n2w.convert(2017)
print(dateInwords)
import n2w
from colorama import init, Fore, Back

init()

print(Back.YELLOW + Fore.RED + "Enter your number:")
y=input()
print(n2w.convert(y))
Exemple #11
0
#q17 python

import n2w

letter_counter = 0
for i in range(1000) :
    name = n2w.convert(i)
    name = name.replace(" ", "")
    letter_counter+=len(name)

    if i % 100 != 0 :
        letter_counter += 3

print letter_counter
Exemple #12
0
def translate(num):
    number = n2w.convert(num) #Converts integer to its equivalent word (e.g.: 1 -> 'one')
    return translator.translate(number, dest='pt').text #Translates the previously created word into portuguese
Exemple #13
0
import n2w as n
from colorama import init, Fore, Back

init()

print ( Back.BLUE+ Fore.YELLOW + 'Enter any number: ')
number = input()
print ( n.convert(number))