Python fromRoman 예제들, roman.fromRoman Python 예제들

예제 #1

0

파일 보기

파일: 3_functional.py 프로젝트: yanglyuxun/info_in_ML

def digit2word(key):
    key=key.replace(' U.S.','').replace(',','').replace('.','').strip()
    if key.replace('0','')=='':
        return 'zero'
    if key=='-0':
        return 'minus zero'
    try:
        if key.endswith("'s"):
            return digit2word(key[:-2])+"'s"
        if key.endswith("s"):
            return digit2word(key[:-1])+"'s"
        key=str(roman.fromRoman(key.replace('IIII','IV')))
    except:
        pass
    try:
        key=str(roman.fromRoman(key.split()[0].replace('IIII','IV')))
    except:
        pass
    try:
        text = p.number_to_words(key,decimal='point',andword='', zero='o')
        if re.match(r'^0\.',key): 
            text = 'zero '+text[2:]
        if re.match(r'.*\.0$',key): text = text[:-2]+' zero'
        text = text.replace('-',' ').replace(',','')
        return text.lower()
    except: return key

예제 #2

0

파일 보기

파일: test_roman.py 프로젝트: avni-preeti/mytestrepo

def testFromRomanCase():
    """fromRoman should only accept uppercase input"""
    for integer in range(1, 5000):
        numeral = roman.toRoman(integer)
        roman.fromRoman(numeral.upper())
        with pytest.raises(roman.InvalidRomanNumeralError):
            roman.fromRoman(numeral.lower())

예제 #3

0

파일 보기

파일: executables.py 프로젝트: marcus-crane/tools

def roman2dec() -> int:
	"""
	Entry point for `se roman2dec`
	"""

	import roman

	parser = argparse.ArgumentParser(description="Convert a Roman numeral to a decimal number.")
	parser.add_argument("-n", "--no-newline", dest="newline", action="store_false", help="don’t end output with a newline")
	parser.add_argument("numbers", metavar="NUMERAL", nargs="+", help="a Roman numeral")
	args = parser.parse_args()

	lines = []

	if not sys.stdin.isatty():
		for line in sys.stdin:
			lines.append(line.rstrip("\n"))

	for line in args.numbers:
		lines.append(line)

	for line in lines:
		try:
			if args.newline:
				print(roman.fromRoman(line.upper()))
			else:
				print(roman.fromRoman(line.upper()), end="")
		except roman.InvalidRomanNumeralError:
			se.print_error("Not a Roman numeral: {}".format(line))
			return se.InvalidInputException.code

	return 0

예제 #4

0

파일 보기

파일: romantest.py 프로젝트: lonesloane/Python-Snippets

 def testFromRomanCase(self):
     """fromRoman should only accept uppercase input"""
     for integer in range(1, 4000):
         numeral = roman.toRoman(integer)
         roman.fromRoman(numeral.upper())
         self.assertRaises(roman.InvalidRomanNumeralError,
                           roman.fromRoman, numeral.lower())

예제 #5

0

파일 보기

def infer_production_century(br, de):
    """
    Estimate the production century for an author.
    If born on or after the 80th year of a century,
    say it's the following century.
    If no dates (only centuries), and centuries differ,
    output a .5 date (e.g. birth XVI, death XVII => 16.5).
    Output False when birth or death date missing.
    """
    if str(br) == '0' or str(de) == '0':
        return False
    try:
        byear = int(br)
        dyear = int(de)
    except ValueError:
        try:
            byear = roman.fromRoman(br)
            dyear = roman.fromRoman(de)
            if byear == dyear:
                pcen = byear
            else:
                assert dyear == byear + 1
                pcen = byear + 0.5
            return pcen
        except roman.InvalidRomanNumeralError:
            return False
    if byear % 100 >= 80:
        pcen = floor(byear / 100) + 2
        assert floor(dyear / 100) + 1 == pcen
    else:
        pcen = floor(byear / 100) + 1
    return pcen

예제 #6

0

파일 보기

파일: romantest.py 프로젝트: Elanqueen/DiveIntoPy

 def testFromRomanCase(self):
     """fromRoman should only accept uppercase input"""
     for integer in range(1, 5000):
         numeral = roman.toRoman(integer)
         roman.fromRoman(numeral.upper())
         self.assertRaises(roman.InvalidRomanNumeralError, roman.fromRoman,
                           numeral.lower())

예제 #7

0

파일 보기

파일: roman_numerals_parser.py 프로젝트: jakubgros/FlatFinder

 def is_roman_number(roman_as_str):
     """ returns True in case provided value is valid roman number, False otherwise """
     try:
         fromRoman(roman_as_str)
     except InvalidRomanNumeralError:
         return False
     else:
         return True

예제 #8

0

파일 보기

    def roman_magic(self, v1, op, v2):

        rom_v1 = str(fromRoman(v1))
        rom_v2 = str(fromRoman(v2))

        operation = rom_v1+op+rom_v2

        return toRoman(eval(operation))

예제 #9

0

파일 보기

	def testFromRomanCase(self):
		"""fromRoman should only accept uppercase input"""
		for integer in range(1, 5000):
			numeral = roman.toRoman(integer)
			roman.fromRoman(numeral.upper())                        # assume nothing about toRoman() in the fromRoman() test!
                                                                    # also, this is an implicit test that this line does NOT raise
			self.assertRaises(roman.InvalidRomanNumeralError, 
							  roman.fromRoman, numeral.lower())

예제 #10

0

파일 보기

파일: utils.py 프로젝트: althingi-net/lagasafn-xml

def is_roman(goo):
    try:
        roman.fromRoman(goo)
        result = True
    except roman.InvalidRomanNumeralError:
        result = False

    return result

예제 #11

0

파일 보기

 def isRoman(txt):
     # helper function, checks if string contains valid roman number
     # uses pip module roman: https://pypi.python.org/pypi/roman
     try:
         roman.fromRoman(txt)
         return True
     except roman.InvalidRomanNumeralError:
         return False

예제 #12

0

파일 보기

def replace_chapter_number(match):
    string = match.group(1)
    try:
        string = str(roman.fromRoman(string))
    except roman.InvalidRomanNumeralError:
        if string.startswith('L'):
            string = str(roman.fromRoman(string[1:]))
    return f'Chapitre {string}.'

예제 #13

0

파일 보기

파일: Kinds.py 프로젝트: ra2003/xindex

 def start_end(self):
     match = PAGE_ORDER_RX.match(self.name)
     if match is not None:
         start = match.group("start")
         end = match.group("end")
         if start.isdigit():
             return int(start), int(end)
         start = roman.fromRoman(start)
         end = roman.fromRoman(end)
         return start, end

예제 #14

0

파일 보기

파일: string_fixes.py 프로젝트: David-Lor/Python_VigoBusAPI

def is_roman(text: str) -> bool:
    """Check if the given string is a Roman number. Return True if it is, False if not.
    """
    text = text.strip().upper()
    text = re.sub(r'[^A-Z]', "", text)
    try:
        fromRoman(text)
    except NoRoman:
        return False
    else:
        return True

예제 #15

0

파일 보기

def getMinimalFormOfNumeral(numeral):
    value = 0

    while (len(numeral) > 1):
        if (roman.fromRoman(numeral[0]) < roman.fromRoman(numeral[1])):
            value -= roman.fromRoman(numeral[0])
        else:
            value += roman.fromRoman(numeral[0])
        numeral = numeral[1:]

    value += roman.fromRoman(numeral[0])
    return roman.toRoman(value)

예제 #16

0

파일 보기

파일: sequ.py 프로젝트: kfrye/sequ

def convertToNum(input_type, s):
    if input_type == "R":
        num = roman.fromRoman(s)
    elif input_type == "r":
        num = roman.fromRoman(s.upper())
    elif input_type == "a" or input_type == "A":
        num = fromCharString(s.lower())
    elif input_type == "i" or input_type == "f":
        num = float(s)
    else:
        num = 0
    return num

예제 #17

0

파일 보기

 def replace_roman_numerals_in_match(m):
     s = m.group(3)
     s = s.upper()
     try:
         if s:
             if m.group(8):    
                 return u"{}{}:{}".format(m.group(1), roman.fromRoman(s), m.group(8))
             else:
                 return u"{}{}{}".format(m.group(1), roman.fromRoman(s), m.group(7))
         else:
             return m.group(0)
     except:
         return m.group(0)

예제 #18

0

파일 보기

파일: master_function_list.py 프로젝트: tmcarlisle/Personal-Identity-in-the-Treatise

def romanDigitRangeCorrector(cite_in):
    dash_expression = re.compile('[-–—]')
    if dash_expression.search(cite_in):

        first_roman_num = re.split(dash_expression, cite_in)[0]
        second_roman_num = second_num = re.split(dash_expression, cite_in)[1]
        first_num = str(roman.fromRoman(first_roman_num.upper()))
        second_num = str(roman.fromRoman(second_roman_num.upper()))
        arabic_range = first_num + '-' + second_num

        return arabic_range
    else:
        return str(roman.fromRoman(cite_in.upper()))

예제 #19

0

파일 보기

파일: FileSanitizer.py 프로젝트: daro-suardiaz/camera-duplicates

    def normalize_string(self, input_string):
        """
        As we have several variations of the Canon Mark models we have to clean it up
        
        1) break down the input string into tokens
        2) check if one of those tokens is a variation of "Mark" (i.e. mark, mk, MarkIII, 5dmark, etc)
        3) check if the mark model number is in roman and covert it to INT
        4) eliminating spaces and lowering case
        """

        tokens = [t for t in input_string.split()]
        normalized_tokens = []
        lookup_pattern = re.compile("(m(?:ar)?k\s*)([ivx\d]*)",
                                    flags=re.IGNORECASE)
        roman_nums = roman.romanNumeralPattern

        for i, token in enumerate(tokens, 0):

            # check if the token is a variation of mark
            lookup = lookup_pattern.search(token)
            if lookup:
                # some strings have the 5d concatenated to the model, let's separate it
                if re.search('5d', token, re.IGNORECASE):
                    token = '5D Mark'
                else:
                    token = 'Mark'
                model_nbr = lookup.group(2)

                # check if the model number is part of the same token and covert it to ,INT
                if model_nbr != '':
                    if model_nbr.isdigit():
                        pass
                    else:
                        model_nbr = roman.fromRoman(model_nbr.upper())
                        token = token + ' ' + str(model_nbr)

            # check if the token is a roman numeral and covert it to INT
            is_roman = roman_nums.search(token)
            if is_roman:
                token = str(roman.fromRoman(token))

            normalized_tokens.append(token)

        # convert the list of normalized tokens into a single string and normalize it
        output_string = ' '.join(normalized_tokens)
        output_string = output_string.strip()
        output_string = output_string.lower()
        output_string = output_string.replace(" ", "")
        return output_string

예제 #20

0

파일 보기

파일: find.py 프로젝트: pdxjohnny/julius_caesar

def onLine(l):
    global PLAY
    global PLAY_LINES
    global line
    global inAct
    global inScene
    global inCharacter
    global currCharacter
    global firstBlock
    l = l.rstrip()
    if isSection(l, START_ACT):
        inAct = roman.fromRoman(l.split()[1])
        if DEBUG:
            print('Act', inAct)
        createIfEmpty(PLAY, inAct)
        inScene = False
        inCharacter = False
        return
    if isSection(l, START_SCENE):
        inScene = roman.fromRoman(l.split()[1][:-1])
        if DEBUG:
            print('Scene', inScene)
        createIfEmpty(PLAY[inAct], inScene)
        PLAY[inAct][inScene] = []
        inCharacter = False
        line = 0
        return
    if isSection(l, START_CHARACTER) and \
            l[len(START_CHARACTER) + 1] != ' ' and \
            len([word for word in l.split() if word[0].isupper()]) == len(l.split()):
        flushCharacter()
        inCharacter = l.strip().upper()
        currCharacter = ''
        firstBlock = True
        if DEBUG:
            print('Character', inCharacter)
        return
    if inAct and inScene and inCharacter and firstBlock:
        if len(l.strip()) == 0 and firstBlock:
            if len(currCharacter) > 0:
                firstBlock = False
            return
        line += 1
        PLAY_LINES[''.join(
            [str(inAct),
             str(inScene), inCharacter,
             l.strip().upper()])] = line
        currCharacter += l + '\n'

예제 #21

0

파일 보기

파일: FormulaNumber.py 프로젝트: wolverineq/asrt

    def _ordinal2word(wordsList, indice):
        """Convert an ordinal number to a written
           word.

           i.e. 1er --> premier

           param strNumber: an utf-8 ordinal number
           return a 'written' ordinal number
        """
        strNumber = NumberFormula._normalizeNumber(wordsList[indice])
        if strNumber.encode('utf-8') == u"1ère".encode('utf-8'):
            return u"première"

        strNewNumber = re.sub(u"[erèm]", "", strNumber)
        if NumberFormula._isCardinalNumber(strNewNumber):
            strNewNumber = num2words(int(strNewNumber), ordinal=True, lang='fr')
        elif NumberFormula._isRomanNumber(strNewNumber):
            #Roman to cardinal
            strNewNumber = strNewNumber.encode('utf-8')
            cardinalNumber = fromRoman(strNewNumber)
            #Digits to ordinal
            strNewNumber = num2words(cardinalNumber, ordinal=True, lang='fr')
        else:
            strNewNumber = strNumber

        strNewNumber = re.sub(r'vingtsi','vingti',strNewNumber)
        strNewNumber = re.sub(r'centsi','centi',strNewNumber)
        strNewNumber = re.sub(r'millionsi','millioni',strNewNumber)
        strNewNumber = re.sub(r'milliardsi','milliardi',strNewNumber)
        
        return strNewNumber

예제 #22

0

파일 보기

파일: annotation_utils.py 프로젝트: HarteminkLab/COMPETE

def get_orf_data(chromo, start, end, orf_annotation = None):
    
    if orf_annotation is None:
        sys.exit("For ORF annotation, a GFF file needs to be provided. Such as this one: https://github.com/jianlingzhong/COMPETE_examples/blob/master/saccharomyces_cerevisiae.20080621.gff")

    genes = pd.read_csv(orf_annotation, sep = '\t', comment='#', header = None)

    # only need the gene features in the gff
    genes = genes.loc[genes.iloc[:, 2] == 'gene', ]

    # don't need Mito and 2-micron chromosome features
    genes = genes.loc[(genes.iloc[:, 0] != 'chrMito') & (genes.iloc[:, 0] != '2-micron')]

    # just need the following columns
    genes = genes.iloc[:, [0, 3,4,6,8]]
    genes.rename(columns={0:'chromo', 3:'start', 4:'end', 6:'strand', 8:'name'}, inplace = True)

    # convert 'chrI' to 1
    genes.chromo = genes.chromo.apply(lambda x: roman.fromRoman(x[3:]))

    genes = genes.loc[(genes.chromo == chromo)]
    selection = ~((genes.end < start) | (genes.start > end))
    genes = genes.loc[selection]
    genes.name = genes.name.apply(find_name)

    return genes

예제 #23

0

파일 보기

파일: stenograms.py 프로젝트: ManoSeimas/manoseimas.lt

    def _parse_stenogram_meta(self, response, meta_xs):
        meta = {}
        source = self._get_source(response.url, 'p_id')
        meta['source'] = source
        meta['_id'] = source['id']
        date_match = meta_xs.re(date_re)
        if date_match:
            year = int(date_match[0])
            month = month_names_map[date_match[1]]
            day = int(date_match[2])
            meta['date'] = date(year, month, day)
        sitting_match = meta_xs.re(sitting_re)
        if sitting_match:
            meta['sitting_time'] = sitting_match[0].title()
            meta['sitting_no'] = sitting_match[1]
        else:
            sitting_no_match = meta_xs.re(sitting_no_re)
            if sitting_no_match:
                meta['sitting_no'] = sitting_no_match[0]
        session_match = meta_xs.re(session_re)
        if session_match:
            meta['session_no'] = roman.fromRoman(session_match[0])
            meta['session_season'] = session_match[1].title()

            if 'date' in meta:
                meta['session'] = '{} {}'.format(meta['date'].year,
                                                 meta['session_season'])

        return meta

예제 #24

0

파일 보기

 def sort_collections(t):
     sort_key = t[0]
     if ": Chartae Latinae" in t[0]:
         key_parts = sort_key.split()
         key_parts[-1] = '{:04}'.format(roman.fromRoman(key_parts[-1]))
         sort_key = ' '.join(key_parts)
     return sort_key

예제 #25

0

파일 보기

def nameclean(orig):
    name = orig.encode('ascii', errors='ignore').decode('ascii')
    name = name.upper().strip()
    name = name.replace('GOTY', 'GAME OF THE YEAR')
    ignoredphrases = [
        '\(', '\)', 'the', 'with', 'Early Access', 'bundle', 'and', '&', 'in',
        'vr', 'beta', 'Double Pack', 'Pack', 'Free to Play', 'Edition'
    ]
    for phrase in ignoredphrases:
        name = re.sub(r'\b{0}\b'.format(phrase), '', name, flags=re.IGNORECASE)
    name = re.sub(r'\bone\b', '1', name, flags=re.IGNORECASE)
    name = re.sub(r'\btwo\b', '2', name, flags=re.IGNORECASE)
    name = re.sub(r'\b40k\b', '40,000', name, flags=re.IGNORECASE)
    name = re.sub(r'\+', 'PLUS', name, flags=re.IGNORECASE)
    name = re.sub(r'[^\w\s]', ' ', name, flags=re.IGNORECASE)
    name = re.sub(
        r'(\A|\s)(\b(?=[MDCLXVI]+\b)M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\b)(\s|\Z)',
        lambda x: x.group(1) + str(roman.fromRoman(x.group(2).strip(
        ))) + x.group(6) if (x.group(0).strip()) else '',
        name,
        flags=re.IGNORECASE)
    name = re.sub(r'(\w*)s\b', r'\1', name, flags=re.IGNORECASE)
    words = name.split()
    name = ' '.join(sorted(set(words), key=words.index))
    name = re.sub(r'\s+', '', name, flags=re.IGNORECASE).strip()
    return name

예제 #26

0

파일 보기

파일: numerals.py 프로젝트: paolomonella/romualdus

def setValues(siglum):
    ''' Take Roman numerals in the text (already marked up with <num>)
        and insert a @value attribute with its arabic value.
        The 'siglum' is the XML filename to parse ('g' for 'g.xml'; 'bonetti' for 'bonetti.xml' etc.).
        Then write the modified tree to another XML file ('numerals-g.xml'; 'numerals-bonetti.xml' etc.).
        '''
    n = myconst.ns
    tree = etree.parse('../xml/%s.xml' % siglum)
    numbers = tree.findall('.//t:num', n)
    for number in numbers:
        if number.get('value') is None:
            content = getRomanContent(number, checkallnumbers=False)
            if content == '':
                print('foo', number.text)
            # print(content.upper(), end='\t')  # debug
            try:
                myvalue = roman.fromRoman(content.upper())
                #print(roman.fromRoman(content.upper()))    # debug
                number.set('value', str(myvalue))
                number.set('type', 'guessedvalue')
            except roman.InvalidRomanNumeralError:
                print('Numero romano non parsabile:', content.upper())
                number.set('type', 'foo')
        else:
            #print('Il valore di @value era già settato a', number.get('value'))
            pass

    tree.write('../xml/numerals-%s.xml' % (siglum),
               encoding='UTF-8',
               method='xml',
               pretty_print=True,
               xml_declaration=True)

예제 #27

0

파일 보기

def token_features(y):
    x=y.strip()
    parts=x.split('.',2)
    pattern = re.compile('[1-9][0-9]*(\.)?([1-9][0-9]*(\.([1-9][0-9]*)?)?)?')
    m = pattern.match(x)
    if x=="Table" or x=="TABLE" or x=="Figure" or x=="FIGURE" or x=="Fig." or x=="FIG.":
        return "0"
    if m and m.span()==(0, len(x)):
        return "1"
    try:
        roman.fromRoman(parts[0].upper())
        return "1"
    except:
        if x[0].isupper():
            return "2"
        return "3"

예제 #28

0

파일 보기

def sgd_gff2dataframe(input_annot_gff_filename, ok_features_list, include_dubious=False):
    """
    loads sgd annotation gff file to dataframe\
   leave only features of the types in the input list
    filters mitochondrial and 2-micron genes
    """
    gff_df = gff2dataframe(input_annot_gff_filename)

    gff_df['Name'] = gff_df['Name'].str.strip()

    #gff_df = gff_df[gff_df['feature'].isin(['gene','CDS'])  ]
    # just coding sequence
    gff_df = gff_df[gff_df['feature'].isin(ok_features_list)  ]   # ['CDS']

    # filtering dubious and not in standard chromosomes
    if include_dubious:
      idx =  ~gff_df['chrom'].isin(['chrMito', '2-micron'])
    else:
      idx = gff_df['Note'].apply(lambda x: not str(x).startswith('Dubious')) & ~gff_df['chrom'].isin(['chrMito', '2-micron'])
    gff_df = gff_df[idx]

    # fixing chromosome to be chrXX where XX is integer 
    gff_df['chrom'] = gff_df['chrom'].apply(lambda x: 'chr' + str(fromRoman(str(x).replace('chr',''))).zfill(2) )

    # removing "SGD:" from the id column
    gff_df['dbxref'] = gff_df['dbxref'].apply(lambda x: str(x).replace('SGD:',''))
    gff_df['start'] = gff_df['start'].apply(int)
    

    
    return(gff_df)

예제 #29

0

파일 보기

def arabicOrRomanToInt(s):
    m = re.match(r'\d+$', s)
    if m:
        i = int(s)
    else:
        i = roman.fromRoman(s)
    return i

예제 #30

0

파일 보기

파일: replace_by_rule.py 프로젝트: fenildf/text_normalization

def replace_ordinal(token):
    """10.序数词"""
    res = token.strip()
    replace_list = [
        "th", "TH", "Th", 'st', 'ST', 'nd', 'ND', 'rd', 'RD', ".", "º", ",",
        "ª"
    ]
    ends = ""
    if res.endswith("'s"):
        ends = "'s"
        res = res[:-2]
    if res.endswith("s"):
        ends = "s"
        res = res[:-1]
    for one in replace_list:
        res = res.replace(one, "").strip()
    if res.isdigit():
        res = numstr2word(float(res), ordinal=True)
    else:
        # 可能是罗马数字
        try:
            num = roman.fromRoman(res)
            res = numstr2word(num, ordinal=True)
            res = "the " + res
        except roman.InvalidRomanNumeralError:
            pass
            # print("roman.InvalidRomanNumeralError", res)
    return res + ends

예제 #31

0

파일 보기

파일: Yeast.py 프로젝트: airanmehr/bio

def load():
    a=pd.read_csv('/home/arya/storage/Data/Yeast/BurkeYeast/freq_tables/snps.txt',sep='\t').rename(columns={'chr':'CHROM','pos':'POS'}).set_index(['CHROM','POS']).iloc[:,2:]
    c=pd.Series(a.columns)
    founders=a[c[c.apply(lambda x:  x[0]=='F')]]
    founders.loc[:,map(lambda x:'cov' in x,founders.columns)].sum(1)
    def toint(x):
        try:return int(x)
        except: return x

    cd=a[c[c.apply(lambda x: 'maf' ==x[:3]) |c.apply(lambda x: 'cov' ==x[:3])]]
    cd.columns=map(lambda x:tuple(map(lambda y: toint(y.replace('cov','D').replace('maf','C')) ,x.split('_'))), cd.columns)
    cd.columns=pd.MultiIndex.from_tuples(cd.columns)
    cd=pd.concat([(cd['C']* cd['D']).round().astype(int),cd['D']],1,keys=['C','D']).reorder_levels([1,2,0],axis=1)
    cd.columns.names=['REP','GEN','READ']

    ancestors=a[c[c.apply(lambda x: 'anc' in  x)]].rename(columns={'anc_maf':'C','anc_cov':'D'})
    ancestors['C']=(ancestors['C']*ancestors['D']).round().astype(int)
    ancestors=pd.concat([ancestors],1,keys=[0])
    R=cd.columns.levels[0]
    ancestors=pd.concat([ancestors for _ in R],1,keys=R)
    ancestors.columns.names=['REP','GEN','READ']
    cd=pd.concat([cd,ancestors],1).sort_index(1).sort_index()
    weekToGen={0:0, 6:180, 12:360, 18:540}
    cd=cd.T.reset_index()
    cd.GEN=cd.GEN.replace(weekToGen)
    cd=cd.set_index(['REP','GEN','READ']).T.sort_index(1).sort_index()
    try:
        import roman
        cd=cd.reset_index()
        cd.CHROM=cd.CHROM.apply(lambda x:roman.fromRoman(x[3:]))
        cd=cd.set_index(['CHROM','POS'])
    except:
        pass
    return

예제 #32

0

파일 보기

파일: replace_by_rule.py 프로젝트: fenildf/text_normalization

def replace_cardinal(token):
    """5.普通数字"""
    token = token.replace('U.S.', "")
    token = token.replace(" ", "")
    token = token.replace('"', "")
    token = token.replace(',', "")
    token = token.replace('.', "")
    token = token.replace(':', "")
    minus = False
    if len(token) == 0:
        return ""
    if token.startswith("-"):
        minus = True
        token = token[1:]
    token = token.replace('-', "")
    ends = ""
    if token.endswith("'s"):
        ends = "'s"
        token = token[:-2]
    elif token.endswith("A") or token.endswith("M"):
        token = token[:-1].strip()
    if token.isdigit():
        res = numstr2word(token)
    else:
        # 可能是罗马数字
        try:
            num = roman.fromRoman(token)
            res = numstr2word(num)
        except roman.InvalidRomanNumeralError:
            res = token
    if minus:
        res = "minus " + res
    res += ends
    return res

예제 #33

0

파일 보기

파일: characterEditor.py 프로젝트: Sectoid/Pyfa

def arabicOrRomanToInt(s):
    m = re.match(r'\d+$', s)
    if m:
        i = int(s)
    else:
        i = roman.fromRoman(s)
    return i

예제 #34

0

파일 보기

파일: Parser.py 프로젝트: ra2003/xindex

 def numeric(x):
     if x[0].isdigit():
         return int(x) + Util.INTEGER_OFFSET
     try:
         return roman.fromRoman(x)
     except roman.RomanError:
         return 0

예제 #35

0

파일 보기

파일: romantest.py 프로젝트: gawaineo/unit_testing_practice

    def testSanity(self):
        """fromRoman(toRoman(n)) == n for all n"""

        for integer in range(1, 5000):
            numeral = roman.toRoman(integer)
            result = roman.fromRoman(numeral)
            self.assertEqual(integer, result)

예제 #36

0

파일 보기

파일: FormulaNumber.py 프로젝트: idiap/asrt

    def _ordinal2word(wordsList, indice):
        """Convert an ordinal number to a written
           word.

           i.e. 1er --> premier

           param strNumber: an utf-8 ordinal number
           return a 'written' ordinal number
        """
        strNumber = NumberFormula._normalizeNumber(wordsList[indice])
        if strNumber.encode('utf-8') == u"1st".encode('utf-8'):
            return u"first"

        strNewNumber = re.sub(u"[ndstrh]", "", strNumber)
        # print strNewNumber
        # if NumberFormula._isCardinalNumber(strNewNumber):
        if strNewNumber.isdigit():
            strNewNumber = num2words(int(strNewNumber), ordinal=True)
            # print(strNewNumber)
        elif NumberFormula._isRomanNumber(strNewNumber):
            #Roman to cardinal
            strNewNumber = strNewNumber.encode('utf-8')
            # print strNewNumber
            cardinalNumber = fromRoman(strNewNumber)
            #Digits to ordinal
            strNewNumber = num2words(cardinalNumber, ordinal=True)
        else:
            print("newnumberis not digit!!!")
            strNewNumber = strNumber

        return strNewNumber

예제 #37

0

파일 보기

파일: parse_constitution_html.py 프로젝트: marijnkoolen/constitution-reference-parser

def get_unit(title):
    pattern = patterns.makeTitlePatterns()
    title = title.lower()
    unit = title.replace(" ", "_")
    unit = re.sub("[ \[\]\(\)]", "_", title)
    number = "1"
    for unitForm, unitPattern in pattern.iteritems():
        m = unitPattern.match(title)
        if m:
            if unitForm == "title_preamble":
                unit = m.group(1)
                number = "1"
            elif unitForm == "title_number":
                unit = "unspecified"
                number = m.group(1)
            elif unitForm == "title_unit_roman":
                unit = m.group(1)
                try:
                    number = str(roman.fromRoman(m.group(2).upper()))
                except roman.InvalidRomanNumeralError:
                    print "Invalid Roman title number:", title, "unit:", unit, "number:", number
                    number = m.group(2)
            else:
                unit = m.group(1)
                number = m.group(2)
                if unit == "art":
                    unit = "article"
    return unit, number

예제 #38

0

파일 보기

파일: integrated.py 프로젝트: priyankpalod/NLP-Affliation

def tb_heading_features(y):
    x=y.strip()
    parts=x.split('.',2)
    pattern = re.compile('[1-9][0-9]*(\.)?([1-9][0-9]*(\.([1-9][0-9]*)?)?)?')
    m = pattern.match(x)
    if x=="Table" or x=="TABLE" or x=="Figure" or x=="FIGURE" or x=="Fig." or x=="FIG.":
        return "0"
    if m and m.span()==(0, len(x)):
        return "1"
    try:
        roman.fromRoman(parts[0].upper())
        return "1"
    except:
        if x[0].isupper():
            return "2"
        return "3"

예제 #39

0

파일 보기

    def maybe_normalize(self,
                        value,
                        mapping='',
                        roman_normalization=True,
                        mapping_append=True):

        if mapping == '':
            mapping = self.default_mapping
        else:
            mapping = mapping + self.default_mapping if mapping_append else self.default_mapping + mapping

        for norm in mapping:
            if type(norm[0]) == str:
                value = value.replace(norm[0], norm[1])
            elif isinstance(norm[0], Pattern):
                value = norm[0].sub(norm[1], value)
            else:
                print('UNEXPECTED', type(norm[0]), norm[0])

        if roman_normalization:
            for ro_before, ro_after, ro in self.get_roman_numbers(value):
                try:
                    value = value.replace(
                        ro_before + ro + ro_after,
                        ro_before + str(roman.fromRoman(ro)) + ro_after)
                except roman.InvalidRomanNumeralError as ex:
                    print(ex)
                    pass

        value = self.clean_single_line(value)

        return value.replace('  ', " ")

예제 #40

0

파일 보기

파일: integrated.py 프로젝트: priyankpalod/NLP-Affliation

def token_features(y):
    x=y.strip()
    parts=x.split('.')
    if(x=="$$$"):
        return "5"
    if x=="Table" or x=="TABLE" or x=="Figure" or x=="FIGURE" or x=="Fig.":
        return "0"
    p_len = len(parts)
    if(p_len==1):
        if(x.isdigit() and 1<=int(x)<=20):
            return "1"
    if(p_len==2 or p_len==3):
        if(parts[0].isdigit() and 1<=int(parts[0])<=20):
            if(parts[1]=='' or (parts[1].isdigit() and int(parts[1])<=20)):
                if(p_len==2):
                    return "1"
                if(parts[1].isdigit() and int(parts[1])<=20 and parts[2]==''):
                    return "1"
    if(p_len==1 or (p_len==2 and parts[1]=='')):
        try:
            val = roman.fromRoman(parts[0].upper())
            if(val<=20):
                return "1"
        except:
            None
        if((len(parts[0])==1 and 'A'<=parts[0]<='Z') or (len(parts[0])==3 and parts[0][0]=='(' and parts[0][2]==')' and parts[0][1].isalpha() and parts[0][1].isupper()) or (len(parts[0])==2 and parts[0][1]==')' and parts[0][0].isalpha() and parts[0][0].isupper())):
            if(p_len==1 or (p_len==2 and parts[1]=='')):
                return "1"
    if x[0].isupper():
        return "2"
    if (not(parts[0].isalpha() or parts[0].isdigit())):
        return "3"
    return "4"

예제 #41

0

파일 보기

 def get_spenser(self):
     '''
     Returns a dictionary of sonnets.
     Each element of the dict is one of the sonnets, keyed by sonnet number
     Each sonnet is a list of lists of lines
     '''
     filename = 'data/spenser.txt'
     num = 0
     sonnets = {}
     sonnet_lines = []
     with open(filename) as f:
         for line in f:
             line = re.sub('[:;.()?!]', '', line.strip()).lower()
             try:
                 num = int(roman.fromRoman(line.upper()))
                 sonnets[num] = []
                 sonnet_lines = []
                 continue
             except ValueError:
                 pass
             except roman.InvalidRomanNumeralError:
                 pass
             next_line = line.split()
             if len(next_line) > 0:
                 sonnets[num].append(next_line)
     return sonnets

예제 #42

0

파일 보기

파일: Taiger_Royal_Names.py 프로젝트: Cawinchan/Taiger_Interview

def getSortedList_easy(lst):

    """
    Uses pre-made module Roman for speed comparision/ to check if conversion is correct
   :param lst: List of Stringed RoyalNames
    :return: List of Sorted RoyalNames
    """

    if all(isinstance(x, str) for x in lst):
        roman_to_int_convertedlst = []
        int_to_roman_convertedlst = []

        for name,ordinal in [royalname.split() for royalname in lst]: #split the name and roman number
            newnumber = roman.fromRoman(ordinal)
            royalname = name, newnumber
            roman_to_int_convertedlst.append(royalname)
        print("Converted from", lst, "\n"
              "to", roman_to_int_convertedlst)
        sortedlst = sorted(roman_to_int_convertedlst)
        print("Sorted", sortedlst)

        for name,integer in sortedlst:
            roman_numeral = roman.toRoman(integer)
            int_to_roman_convertedlst.append(name+" "+str(roman_numeral))

    return int_to_roman_convertedlst

예제 #43

0

파일 보기

파일: text.py 프로젝트: 2la2/Sefaria-Project

 def replace_roman_numerals_in_match(m):
     s = m.group(2)
     s = s.upper()
     try:
         if s:
             return "%s%s:%s" % (m.group(1), roman.fromRoman(s), m.group(7))
     except:
         return m.group(0)

예제 #44

0

파일 보기

파일: ps.py 프로젝트: pubstandards/pubstandards-london

def ps_event(numeral):
    try:
        number = roman.fromRoman(numeral.upper())
    except roman.InvalidRomanNumeralError:
        return "Invalid roman numeral!", 400

    event = ps_data.get_ps_event_by_number(number)
    return flask.render_template('event.html', event=event)

예제 #45

0

파일 보기

파일: solve.py 프로젝트: xiaotianrandom/contest_for_fun

def parse_token(token):
    if token in '()+-*/':
        return token
    if len(token.strip('0123456789,')) == 0:
        return token.replace(',', '')
    if len(token.strip('IVXLCDM')) == 0:
        return str(roman.fromRoman(token))
    log.warning('Unknown token: ' + token)
    return raw_input()

예제 #46

0

파일 보기

파일: parsing.py 프로젝트: hzxusx/PlasmaPy

    def _extract_charge(arg: str):
        """
        Receive a `str` representing an element, isotope, or ion.
        Return a `tuple` containing a `str` that should represent an
        element or isotope, and either an `int` representing the
        charge or `None` if no charge information is provided.  Raise
        an `~plasmapy.utils.InvalidParticleError` if charge information
        is inputted incorrectly.
        """

        invalid_charge_errmsg = (f"Invalid charge information in the particle string '{arg}'.")

        if arg.count(' ') == 1:  # Cases like 'H 1-' and 'Fe-56 1+'
            isotope_info, charge_info = arg.split(' ')

            sign_indicator_only_on_one_end = (
                    charge_info.endswith(('-', '+')) ^
                    charge_info.startswith(('-', '+')))

            just_one_sign_indicator = (
                    (charge_info.count('-') == 1 and
                     charge_info.count('+') == 0) or
                    (charge_info.count('-') == 0 and
                     charge_info.count('+') == 1))

            if not sign_indicator_only_on_one_end and just_one_sign_indicator:
                raise InvalidParticleError(invalid_charge_errmsg) from None

            charge_str = charge_info.strip('+-')

            try:
                if roman.romanNumeralPattern.match(charge_info):
                    Z_from_arg = roman.fromRoman(charge_info) - 1
                elif '-' in charge_info:
                    Z_from_arg = - int(charge_str)
                elif '+' in charge_info:
                    Z_from_arg = int(charge_str)
                else:
                    raise InvalidParticleError(invalid_charge_errmsg) from None
            except ValueError:
                raise InvalidParticleError(invalid_charge_errmsg) from None

        elif arg.endswith(('-', '+')):  # Cases like 'H-' and 'Pb-209+++'
            char = arg[-1]
            match = re.match(f"[{char}]*", arg[::-1])
            Z_from_arg = match.span()[1]
            isotope_info = arg[0:len(arg) - match.span()[1]]

            if char == '-':
                Z_from_arg = -Z_from_arg
            if isotope_info.endswith(('-', '+')):
                raise InvalidParticleError(invalid_charge_errmsg) from None
        else:
            isotope_info = arg
            Z_from_arg = None

        return isotope_info, Z_from_arg

예제 #47

0

파일 보기

파일: FormulaNumber.py 프로젝트: d-unknown-processor/asrt

    def _roman2word(strNumber):
        """Convert a roman number to a written
           word.

           param strNumber: an utf-8 roman number
           return a 'written' roman number
        """
        strNumber = strNumber.encode('utf-8')
        cardinalNumber = fromRoman(strNumber)
        return NumberFormula._cardinal2word(cardinalNumber)

예제 #48

0

파일 보기

파일: xmlParser.py 프로젝트: tpmccallum/BookwormDB

 def markup(self):
     all = descend(self.dom,stackOfTags=[],seenSoFar=dict(),depth=1)
     returnable = []
     for key in all:
         line = dict(key)
         line["textString"] = all[key]
         if re.search(r"\w+",line["textString"]):
             try:
                 line["actNumber"] = roman.fromRoman(re.findall(r"(?:ACT|Act) +(\w+)",line["ACT"])[0])
             except KeyError:
                 pass
             try: 
                 line["sceneNumber"] = roman.fromRoman(re.findall(r"(?:SCENE|Scene) +([IXVL]+)",line["SCENE"])[0])
             except KeyError:
                 pass
             except:
                 pass
             returnable.append(line)
     return returnable

예제 #49

0

파일 보기

파일: FormulaNumber.py 프로젝트: idiap/asrt

    def _roman2word(strNumber):
        """Convert a roman number to a written
           word.

           param strNumber: an utf-8 roman number
           return a 'written' roman number
        """
        strNumber = strNumber.encode('utf-8')
        cardinalNumber = fromRoman(strNumber)
        strNewNumber = num2words(cardinalNumber)
        return strNewNumber

예제 #50

0

파일 보기

파일: util.py 프로젝트: DIYBookScanner/spreads

 def _to_int(self, value):
     if isinstance(value, int):
         return value
     elif isinstance(value, basestring) and self.is_roman(value.upper()):
         return roman.fromRoman(value.upper())
     elif isinstance(value, RomanNumeral):
         return value._val
     else:
         raise ValueError("Value must be a valid roman numeral, a string"
                          " representing one or an integer: '{0}'"
                          .format(value))

예제 #51

0

파일 보기

파일: timeline_chunk.py 프로젝트: mdrohmann/sphinxplugin-project-timeline

    def parse_worked_on(self, text_or_node, submodule):

        submodule = roman.fromRoman(submodule) - 1

        worked_strings = []
        if isinstance(text_or_node, basestring):
            worked_strings = [text_or_node]
        else:
            enumerations = text_or_node.traverse(utils.is_list_or_enumeration)
            for enumeration in enumerations:
                worked_strings += utils.parse_list_items(text_or_node)

        self._parse_worked_strings(worked_strings, submodule)

예제 #52

0

파일 보기

파일: data_types.py 프로젝트: the-it/WS_THEbotIT

 def _compute_sortkey(self):
     match = _REGEX_MAPPING[self.type].search(self.name)
     key = "4"
     latin_number = 0
     if self.name != "R":
         latin_number = roman.fromRoman(match.group(1))
     if self.type == VolumeType.FIRST_SERIES:
         key = f"1_{latin_number:02d}_{match.group(2)}"
     elif self.type == VolumeType.SECOND_SERIES:
         key = f"2_{latin_number:02d}_{match.group(2)}"
     elif self.type == VolumeType.SUPPLEMENTS:
         key = f"3_{latin_number:02d}"
     return key

예제 #53

0

파일 보기

파일: utils.py 프로젝트: mdrohmann/sphinxplugin-project-timeline

def split_name_and_submodule(name):
    res = submodule_split_re.match(name)
    parts = list(res.groups())
    parts[0] = parts[0].strip()
    if len(parts[1]) == 0:
        parts = [parts[0]]
    else:
        submodules = submodules_split_re.split(
            parts[1])
        parts = [parts[0]] + [
            roman.fromRoman(submodule.upper()) - 1
            for submodule in submodules]
    return parts

예제 #54

0

파일 보기

파일: rewrite.py 프로젝트: marijnkoolen/constitution-reference-parser

	def rewrite_roman_numerals(self, text):
		if self.refUnits == []:
			return text
		romanRef = re.compile(r"\b(" + "|".join(self.refUnits) + ") " + self.romanPattern + r"\b")
		matches = romanRef.findall(text)
		for unit, romanNumeral in matches:
			try:
				transform = roman.fromRoman(romanNumeral.upper())
				number = str(transform)
				text = re.sub(r"\b" + romanNumeral + r"\b", number, text)
				#print "unit:", unit, "Roman Numeral:", romanNumeral, "number:", number, "context:", text
			except roman.InvalidRomanNumeralError:
				pass
		return text

예제 #55

0

파일 보기

파일: index.py 프로젝트: maieul/indexation-sources

def traiter_nombres(chaine):
	chaine = chaine.replace(' ','')
	global seps
	i = -1
	sep = 0 # position du separateur

	#1. convertir le nombre romain en entier
	for c in chaine: #parcourir la chaine
		i = i + 1
		if c in seps:
			morceau = chaine[sep:i].upper() 		#le morceau à traiter
			try:
				chaine = chaine[:sep] + str(fromRoman(morceau)) + chaine[i:]
			except:
				pass
			sep = i+1
		if sep == 0:
			try:
				chaine = str(fromRoman(chaine))
			except:
				pass 
		
	chaine = supprimer_sep_nombre(chaine)
	return chaine

예제 #56

0

파일 보기

파일: timeline_chunk.py 프로젝트: mdrohmann/sphinxplugin-project-timeline

    def parse_dependencies(self, text_or_node, submodule):

        submodule = roman.fromRoman(submodule) - 1

        dep_strings = []
        if isinstance(text_or_node, basestring):
            dep_strings = [text_or_node]
        else:
            enumerations = text_or_node.traverse(utils.is_list_or_enumeration)
            for enumeration in enumerations:
                dep_strings += utils.parse_list_items(text_or_node)

        if submodule not in self.dependencies:
            self.dependencies[submodule] = []
        self.dependencies[submodule] += dep_strings

예제 #57

0

파일 보기

파일: SecMapper.py 프로젝트: blumonkey/SecMapper

def token_features(tok_input):
    tok = tok_input.strip()
    parts = tok.split('.')
    # Mark special sections
    if tok == "Abstract" or tok == "ABSTRACT" or tok == "Acknowledgement" or tok == "ACKNOWLEDGEMENT" or tok == "References" or tok == "Reference" or tok == "REFERENCE" or tok == "REFERENCES" or tok == "Acknowledgements" or tok == "ACKNOWLEDGEMENTs":
        return "6"
    # Mark special symbol '$$$'
    if tok == "$$$":
        return "5"
    # Mark table/figure headings
    if tok == "Table" or tok == "TABLE" or tok == "Figure" or tok == "FIGURE" or tok == "Fig.":
        return "0"
    p_len = len(parts)
    # Digit tokens
    if p_len == 1:
        if tok.isdigit() and 1 <= int(tok) <= 20:
            return "1"
    if p_len == 2 or p_len == 3:
        if parts[0].isdigit() and 1 <= int(parts[0]) <= 20:
            if parts[1] == '' or (parts[1].isdigit() and int(parts[1]) <= 20):
                if p_len == 2:
                    return "1"
                if parts[1].isdigit() and int(parts[1]) <= 20 and parts[2] == '':
                    return "1"
    # Handling Roman Numericals
    if p_len == 1 or (p_len == 2 and parts[1] == ''):
        try:
            val = roman.fromRoman(parts[0].upper())
            if val <= 20:
                return "1"
        except:
            pass
        if ((len(parts[0]) == 1 and 'A' <= parts[0] <= 'Z') or (
                            len(parts[0]) == 3 and parts[0][0] == '(' and parts[0][2] == ')' and parts[0][
                1].isalpha() and parts[0][1].isupper()) or (
                                len(parts[0]) == 2 and parts[0][1] == ')' and parts[0][0].isalpha() and parts[0][
                    0].isupper())):
            if p_len == 1 or (p_len == 2 and parts[1] == ''):
                return "1"
    # Handling Capitalizations
    if tok[0].isupper():
        return "2"
    if not (parts[0].isalpha() or parts[0].isdigit()):
        return "3"
    return "4"

예제 #58

0

파일 보기

파일: bot.py 프로젝트: kumarabd/writeups

def solve(ques):
    # We only need lhs
    ques = ques.split("=")[0]
    # We don't need commas
    ques = ques.repace(',', '')
    parsed = ''
    # Let's separate parts between operators
    groups = re.split('([\-\+\*\/\(\)])', ques)
    # Convert each part to a number
    for g in groups:
        g = g.strip()
        # If part a roman numeral, convert it to decimal
        if re.match('([A-Z]+)', g):
            parsed += "%s" % roman.fromRoman(g)
        # If part is in plain english, convert it to decimal
        elif re.match('([a-z]+)', g):
            parsed += "%s" % text2int(g)
        else: parsed += g
    # Now eval can do the rest
    return "%s\r\n" % (eval(parsed))

예제 #59

0

파일 보기

파일: 150826_RE_link_conversion.py 프로젝트: the-it/WS_THEbotIT

def decide_REIA_or_REWL(re_sub):
    test1 = re_sub.group(0)
    searcher = FIT.search(re_sub.group(0))
    (arg1, arg2, arg3) = build_arg(searcher)
    if searcher.group(1) == 'archive':  # archive
        return '{{REIA|%s|%s}}' % (arg1, arg2)
    else: # wikilivre
        arabic = roman.fromRoman(searcher.group(3))
        subl = searcher.group(2)
        app = searcher.group(4)
        halfband = searcher.group(5)
        if  (app is None) and (subl is None) and (arabic < 12): # all between I and XI
            return '{{REIA|%s|%s}}' % (arg1, arg2)
        # I A,1 and II A,1
        elif (subl is None) and (app == 'A') and (arabic < 3) and (halfband == '1'):
            return '{{REIA|%s|%s}}' % (arg1, arg2)
        # S I, S II, S III
        elif (subl == 'S') and (app is None) and (arabic < 4) and (halfband is None):
            return '{{REIA|%s|%s}}' % (arg1, arg2)
        else: # rest
            return '{{REWL|%s|%s}}' % (arg1, arg2)