Python contains_commasの例

プログラミング言語: Python

名前空間/パッケージ名: text.text_utils

メソッド/関数: contains_commas

hotexamples.comのコード掲載数: 2

Python contains_commas - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtext.text_utils.contains_commasの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: celltype_analyzer.py プロジェクト: sebneu/csvprofiler

def contains_number(cell):
    #numbers and maybe special charaters
    if "/" in cell or "-" in cell:
        try:
            dateutil.parser.parse(cell)
            return "DATE"
        except ValueError:
            pass
        except TypeError:
            print cell

    #TODO Time

    if text_utils.contains_commas(cell):
        #FLOAT DOUBLE
        #we have digits and commas

        type = detectFloat(cell)
        if type:
            return 'NUMBER/'+type

        #remove any whitespaces

    if cell.isdigit():
        cell = long(cell)

        p="+"
        if cell <0:
            cell = math.fabs(cell)
            p="-"

        magnitude = 1 if cell == 0 else int(math.log10(cell)) + 1
        return "NUMBER/INT:"+str(magnitude)+p

    #no alphas
    return "NUMBER+"

コード例 #2

ファイルを表示

ファイル: celltype_analyzer.py プロジェクト: sebneu/csvprofiler

def detectCellType(cell):
    '''
    Detect the type of a cell value
    Basic idea is:
        1) 1.1 (EMPTY): test for empty or null values
        2) check if the cell contains characters or not (+ some extra symbols for unit measures)
            2.1) contains characters
                2.1.1 (EMAIL) contains @ -> prop for EMAIL
                2.1.2 (URL) contains / or . -> prop for URL
                2.1.3 contains numbers
                    2.1.3.1) (DATE) try to parse the cell as a date
                    2.1.3.1) (TOKENS) contains whitespace


    @param cell:
    @return: BASETYPE:len/SUBTYPE1:len-SUBTYPE2:len
             The BASETYPE can be EMPTY, EMAIL, URL, DATE, NUMBER, ALPHA, ALPHANUM, NUMALPHA
             A + at the end of a basetype indicates special characters
             The length of the BASETYPE is separated by : and is optionally given
             Some types have SUBTYPEs, e.g., NUMBER/INT:4+ (the plus indicates greater zero), or NUMALPHA/NUMBER/FLOAT:3.1-ALPHA+:3
    '''
    #basic cleaning
    cell = cell.strip()

    # EMPTY, simple case, empty
    if text_utils.is_none_type(cell):
        return "EMPTY"
    else:
        # check for email, url, date and text (because it could contain a number as well)
        #EMAIL?
        if text_utils.contains_ampersand(cell):
            #ampersand -> email?
            if text_utils.is_email(cell):
                return "EMAIL"
        #URL?
        if "/" in cell or ".":
            # / or . -> URL?
            if url_regex.match(cell):
                return "URL"

        #contains ALPHA?
        if text_utils.contains_alpha(cell) or text_utils.contains_unit_symbol(cell):
            #we have at least one character in the cell

            #contains numbers ? ALPHANUM, NUMALPHA
            if text_utils.contains_number(cell):
                #DATE
                try:
                    dateutil.parser.parse(cell)
                    return "DATE"
                except ValueError:
                    pass
                except TypeError:
                    pass
                    #print "TypeERROR",cell

                #NUMALPHA (e.g. 12cm or 12 cm)
                numalpha = True
                num = ''
                ending = ''
                for s in cell:
                    if s.isdigit() or text_utils.contains_commas(s):
                        if len(ending) > 0:
                            numalpha = False
                            break
                        num += s
                    else:
                        ending += s
                ending = ending.strip()
                if numalpha and len(num) > 0 and len(ending.split(" ")) == 1:
                    is_alph = ''
                    if not ending.isalpha():
                        is_alph = '+'
                    return 'NUMALPHA/'+contains_number(num)+'-ALPHA' + is_alph + \
                           ':'+str(len(ending))

                # ALPHANUM ending with numbers (e.g., $ 4.50, % 30)
                alphnum = True
                num = ''
                leading = ''
                for s in cell:
                    if s.isdigit() or text_utils.contains_commas(s):
                        num += s
                    else:
                        if len(num) > 0:
                            alphnum = False
                            break
                        leading += s
                leading = leading.strip()
                if alphnum and len(num) > 0 and len(leading.split(" ")) == 1:
                    is_alph = ''
                    if not leading.isalpha():
                        is_alph = '+'
                    return 'ALPHANUM/ALPHA' + is_alph + ':'+str(len(leading))+\
                           '-'+contains_number(num)

                #TOKENS
                if " " in cell:
                    for token in cell.split(" "):
                        if not token.isalnum():

                            return "ALPHANUM+/TOKENS:" + str(len(cell.split(" ")))
                    return "ALPHANUM/TOKENS:" + str(len(cell.split(" ")))

                #ok alpha +num and not a date
                if cell.isalnum():
                    return "ALPHANUM:" + str(len(cell))
                else:
                    #alpha + numbers + something
                    return "ALPHANUM+:" + str(len(cell))

            elif " " in cell:
                #no numbers
                for token in cell.split(" "):
                    if not token.isalpha():
                        return "ALPHA+/TOKENS:" + str(len(cell.split(" ")))
                return "ALPHA/TOKENS:" + str(len(cell.split(" ")))

            elif cell.isalpha():
                #we have only alphas
                return "ALPHA:" + str(len(cell))
            else:
                # ok we have alpha and special characters
                return "ALPHA+:" + str(len(cell))

        elif text_utils.contains_number(cell):
            return contains_number(cell)

    return "UNKNOWN"