コード例 #1
0
def see_if_iserror():
    p = ForNestedIf.ExcelParser()

    filenamestring = 'CONEQUAL.txt'
    filepath = 'D:\\Users\\v-jizha4\\ExcelExp\\expAnalyResults\\totalsplits\\' + filenamestring
    readfile = open(filepath)

    lines = readfile.readlines()
    returnlist = []
    filenamelist = []
    for eachline in lines:
        eachline = eachline.strip()
        if 'D:\Users' in eachline:
            filenamelist.append(eachline)
            continue

        returnlist.append(eachline)
    analyzelist = returnlist
    count = 0
    for i in analyzelist:

        p.parse('='+i)
        dic = p.get_dic_depth_token()
        if for_iserror(i,dic):
            count += 1
            print count
コード例 #2
0
def return_true_ifis_useless(formula):
    p = ForNestedIf.ExcelParser()
    try:
        p.parse('=' + formula)
    except:
        return False
    all_innerif_list = p.get_all_innerif_list()
    all_innerif_list.append(formula)
    if len(all_innerif_list) == 0:
        return False

    for eachone in all_innerif_list:

        try:
            p.parse('=' + eachone)
        except:
            return False

        threeparts = p.get_threeparts_IF()

        if (threeparts[1] + '=' + threeparts[2]) == threeparts[0]:
            return True
        elif (threeparts[2] + '=' + threeparts[1]) == threeparts[0]:
            return True

    return False
コード例 #3
0
def get_simplified_CHOOSE_pattern(formula, threepartlist):
    condition_part_list = threepartlist[0]
    true_part_list = threepartlist[1]
    false_part_list = threepartlist[2]

    before_equal_list = []
    after_equal_list = []
    p = ForNestedIf.ExcelParser()

    for each in condition_part_list:
        before = each.split('=')[0]
        after = each.split('=')[1]


        try:
            p.parse('=' + after)
        except:

            return False

        after_equal_list.append(after)
        before_equal_list.append(before)

    choose_string = ''

    for each in true_part_list:
        choose_string+=each+','

    initial = '('+ before_equal_list[0]+'-'+after_equal_list[0]+')/('+after_equal_list[1]+'-'+after_equal_list[0]+')'


    choose_string = choose_string[:-1] #remove the last ','
    final = 'IFERROR(CHOOSE('+str(initial)+'+1 ,'+choose_string+'),'+false_part_list[-1]+')'

    return final
コード例 #4
0
def classify_equal_formulas(formula):
    ischoose = False
    isand = False
    isnewform = False
    p = ForNestedIf.ExcelParser()
    p.parse('='+formula)
    dic = p.get_dic_depth_token()
    returnlist = p.get_dic_for_equal(dic)
    dic_condition_value = returnlist[0]
    true_value_list = returnlist[3]

    try:
        num_value_list = []
        for item in true_value_list:
            num_value_list.append(float(item))
        print num_value_list
        if is_arithmetic(num_value_list):
            ischoose = True
            return 'ischoose'
        else:
            if len(set(true_value_list)) < len(true_value_list):
                isand = True
                return '  '
            else:
                return 'isnewform'

    except:
        if len(set(true_value_list)) < len(true_value_list):
            isand = True
            return 'isand'
        else:
            isnewform = True
            return 'isnewform'
def get_final_func_result(sourcefilepath,resultfilepath):
    writefile = open(resultfilepath,'w')
    readfile = open(sourcefilepath)

    count = 0
    lines = readfile.readlines()


    for newline in lines:
        newline = newline.strip()

        if newline:
            count +=1

            try:
                excelname,oldformula,newformula, tfand, tfor, tflookup, tfidlookup, tfchoose, tfmatch, tfifs, tfmaxmin, tfuseless, ifredun,olddepth, newdepth = newline.split('::')
            except:
                continue

            # print num_formula
            p = ForNestedIf.ExcelParser()
            p.parse('='+oldformula)
            old_depth = p.get_nested_ifs()
            p.parse('=' + newformula)
            new_depth = p.get_nested_ifs()
            depthreduce = old_depth - new_depth
            proportion = depthreduce/old_depth
            writefile.write(olddepth+','+newdepth+','+str(depthreduce)+','+str(proportion)+','+tfand+','+ tfor+','+tflookup+','+tfidlookup+','+ tfchoose+','+ tfmatch+','+ tfifs+','+ tfmaxmin+','+ tfuseless+','+ ifredun+'\n')
        else:
            continue
    writefile.close()
コード例 #6
0
def return_true_ifis_MAXMIN(formula):
    p = ForNestedIf.ExcelParser()
    try:
        p.parse('=' + formula)
    except:
        return False
    all_innerif_list = p.get_all_innerif_list()
    all_innerif_list.append(formula)
    if len(all_innerif_list) == 0:
        return False

    for eachone in all_innerif_list:

        if eachone.count('IF') != 1:
            continue

        threeparts = p.get_threeparts_IF()

        for eachsymbol in ['<', '>', '<=', '>=']:

            if (threeparts[1] + eachsymbol + threeparts[2]) == threeparts[0] or (
                                    threeparts[1] + eachsymbol + '(' + threeparts[2] + ')') == threeparts[0] or (
                                    '(' + threeparts[1] + ')' + eachsymbol + threeparts[2]) == threeparts[0] or (
                                            '(' + threeparts[1] + ')' + eachsymbol + '(' + threeparts[2] + ')') == \
                    threeparts[0]:
                return True
            if (threeparts[2] + eachsymbol + threeparts[1]) == threeparts[0] or (
                                    threeparts[2] + eachsymbol + '(' + threeparts[1] + ')') == threeparts[0] or (
                                    '(' + threeparts[2] + ')' + eachsymbol + threeparts[1]) == threeparts[0] or (
                                            '(' + threeparts[2] + ')' + eachsymbol + '(' + threeparts[1] + ')') == \
                    threeparts[0]:
                return True

    return False
コード例 #7
0
def deal_with_AND_condition(formula):
    p = ForNestedIf.ExcelParser()
    try:
        p.parse('='+formula)

    except:
        return False
    stringdic = {}
    indent = 0
    count = 0
    stringdic[count] = ''
    isAndPattern = False # contain AND or &
    total_count = 0
    if p.tokens:

        if not p.tokens.items:
            return False
        if p.tokens.items[0].tvalue == 'AND':
            isAndPattern = True
        elif '&' in formula:
            isAndPattern = True
        if not isAndPattern:
            return False

        for t in p.tokens.items:
            isrange_anothersheet = False
            newvalue = ''

            if '!' in t.tvalue and t.tsubtype == 'range':
                newvalue = '\'' + t.tvalue.split('!')[0] + '\'' + '!' + t.tvalue.split('!')[1]
                isrange_anothersheet = True

            if (t.tsubtype == p.TOK_SUBTYPE_STOP):
                indent -= 1

            if (indent == 1 and t.tvalue == ',') or (indent == 0 and t.tvalue == '&'):
                if 'AND' in stringdic[count]:
                    stringdic[count] = stringdic[count].replace('AND','')
                count += 1
                stringdic[count] = ''
                continue


            else:
                if isrange_anothersheet:
                    stringdic[count] += newvalue

                elif t.tsubtype == 'text':
                    stringdic[count] += '\"' + t.tvalue + '\"'
                elif t.tsubtype == 'number' and t.tvalue == '':
                    stringdic[count] += '\"\"'
                else:
                    stringdic[count] += t.tvalue

            if (t.tsubtype == p.TOK_SUBTYPE_START):
                indent += 1;

    return stringdic
コード例 #8
0
def redundancy_loop_NEW(formula):
    formula = formula.replace(' ', '')
    containredun = True
    while(containredun):
        containredun = False
        para_if_list = ForNestedIf.get_para_iflist_second(formula)
        if not para_if_list:
            para_if_list = []
        p = ForNestedIf.ExcelParser()
        try:
            p.parse('=' + formula)
        except:
            return False
        threeparts_list = p.get_threeparts_IF()
        for eachlist in threeparts_list:
            innerpara_if_list = ForNestedIf.get_para_iflist_second(eachlist)
            if innerpara_if_list:
                for eachinner in innerpara_if_list:
                    para_if_list.append(eachinner)
        if not para_if_list or len(para_if_list) == 0:
            return formula

        for each in para_if_list:
            dic = {}

            get_condition_values_dic(formula, dic)
            if dic == '':
                return False

            returnlist = deal_with_redundancy_oneround(each, dic)
            # print 'returnlist: ', returnlist

            if returnlist:

                if each in formula:
                    containredun = True
                    formula = formula.replace(each,returnlist[1])
                    p = ForNestedIf.ExcelParser()


    return formula
コード例 #9
0
def refactor_loop_NEW(formula,dic):

    if formula:
        formula = formula.replace(' ', '')
    else:
        return False
    canrefactor = True
    while canrefactor:
        canrefactor = False
        para_if_list = ForNestedIf.get_para_iflist_second(formula)
        if not para_if_list:
            para_if_list = []

        p = ForNestedIf.ExcelParser()
        p.parse('=' + formula)
        threeparts_list = p.get_threeparts_IF()
        for eachlist in threeparts_list:
            innerpara_if_list = ForNestedIf.get_para_iflist_second(eachlist)
            if innerpara_if_list:
                for eachinner in innerpara_if_list:

                    para_if_list.append(eachinner)

        if not para_if_list or len(para_if_list) == 0:
            return formula

        for each in para_if_list:
            returnresults = pattern_match_oneround(each, dic)




            if returnresults and each in formula:
                canrefactor = True
                formula = formula.replace(each, returnresults)
                p.parse('='+formula)
                newlen = p.get_nested_ifs()
                if newlen <2:
                    break
    return formula
コード例 #10
0
def predealwith_formula(formula):
    formula = formula.strip().replace(' ', '')
    if '[#ThisRow],' in formula:
        formula = formula.replace('[#ThisRow],', '')
    p = ForNestedIf.ExcelParser()
    try:
        p.parse('='+formula)
    except:
        return False
    # if '+' not in formula:
    #     return formula
    else:
        indent = 0
        returnstring = ""
        stopindent = 0
        stop = False
        if p.tokens:
            while (p.tokens.moveNext()):
                token = p.tokens.current();

                t = token

                if (t.tsubtype == p.TOK_SUBTYPE_STOP):
                    indent -= 1
                    if indent == (stopindent - 1):
                        returnstring += t.tvalue + ')'
                if (t.tsubtype == p.TOK_SUBTYPE_START):

                    returnstring += t.tvalue + '('
                elif (t.tsubtype == p.TOK_SUBTYPE_STOP):

                    returnstring += t.tvalue + ')'
                else:
                    if t.tsubtype == 'text':
                        returnstring += '\"' + t.tvalue + '\"'
                    elif t.tsubtype == 'number' and t.tvalue == '':
                        returnstring += '\"\"'
                    else:
                        returnstring += t.tvalue

                if (t.tsubtype == p.TOK_SUBTYPE_START):
                    indent += 1;

        return returnstring.strip()
コード例 #11
0
def get_simplified_MAXMIN_pattern(formula):
    p = ForNestedIf.ExcelParser()
    try:
        p.parse(formula)
    except:
        return False
    all_innerif_list = p.get_all_innerif_list()
    all_innerif_list.append(formula)
    if len(all_innerif_list) == 0:
        return False

    for eachone in all_innerif_list:
        final = ''
        if eachone.count('IF') != 1:
            continue
        threeparts = p.get_threeparts_IF()
        for eachsymbol in ['<','>','<=','>=']:

            if (threeparts[1] + eachsymbol + threeparts[2]) == threeparts[0] or (
                                    threeparts[1] + eachsymbol + '(' + threeparts[2] + ')') == threeparts[0] or (
                                    '(' + threeparts[1] + ')' + eachsymbol +threeparts[2]) == threeparts[0] or (
                                '(' + threeparts[1] + ')' + eachsymbol+ '(' + threeparts[2] + ')') == threeparts[0]:
                if eachsymbol == '>' or eachsymbol == '>=':

                    final = 'MAX('+threeparts[1]+','+threeparts[2]+')'
                elif eachsymbol == '<' or eachsymbol == '<=':

                    final = 'MIN(' + threeparts[1] + ',' + threeparts[2] + ')'
            if (threeparts[2] + eachsymbol + threeparts[1]) == threeparts[0] or (
                                    threeparts[2] + eachsymbol + '(' + threeparts[1] + ')') == threeparts[0] or (
                                    '(' + threeparts[2] + ')' + eachsymbol +threeparts[1]) == threeparts[0] or (
                                '(' + threeparts[2] + ')' + eachsymbol+ '(' + threeparts[1] + ')') == threeparts[0]:
                if eachsymbol == '>' or eachsymbol == '>=':

                    final = 'MIN('+threeparts[1]+','+threeparts[2]+')'
                elif eachsymbol == '<' or eachsymbol == '<=':
                    final = 'MAX(' + threeparts[1] + ',' + threeparts[2] + ')'

        if final != '':
            return formula.replace(eachone,final)
    return False
コード例 #12
0
def get_para_iflist(formula):
    # input: the formula that you want to refactor
    # output: a list of inner if parts. They do not contain each other. Return false if the formula is not nested if formula
    p = ForNestedIf.ExcelParser()
    try:
        p.parse('=' + formula)
    except:
        return False
    ifreturnfalse = True

    returnlist = p.get_para_if()


    for each in returnlist:

        if (each.count('IF') - each.count('IFS')) > 1 and (each.count('IF') - each.count('IFERROR')) > 1 and (
            each.count('IF') - each.count('IFNA')) > 1:
            ifreturnfalse = False
    if not ifreturnfalse:
        return returnlist
    else:
        return False
コード例 #13
0
def get_condition_values_dic(formula, dic):
    p = ForNestedIf.ExcelParser()
    try:
        p.parse('='+formula)

        if p.tokens.items[0].tvalue != 'IF':
            p.parse('='+p.get_onlyIFfunction())
        threeparts = p.get_threeparts_IF()

        if 'condition_list' in dic:
            dic['condition_list'].append(threeparts[0])
        else:
            dic['condition_list'] = [threeparts[0]]


        if threeparts[0] in dic:

            dic[threeparts[0]].append(threeparts[1])
        else:
            dic[threeparts[0]] = [threeparts[1]]
        if threeparts[0] in dic:

            dic[threeparts[0]].append(threeparts[2])
        else:
            dic[threeparts[0]] = [threeparts[2]]


        if 'IF' in formula:
            for each in threeparts:
                if 'IF' in each:
                    p.parse('='+each)
                    new = p.get_onlyIFfunction()

                    get_condition_values_dic(new,dic)
    except:
        dic = {}
def deal_with_zeroreduce(sourcefilepath,resultfilepath):
    writefile = open(resultfilepath,'a')
    readfile = open(sourcefilepath)

    count = 0

    totalcount = 0


    while(True):
        newline = readfile.readline().strip()
        depthreduce = 0

        if newline:
            count +=1
            if count%1000 == 0:
                print count
            path, sheetname,oldformula,newformula, tfand, tfor, tflookup, tfidlookup, tfchoose, tfmatch, tfifs, tfmaxmin, tfuseless, ifredun,olddepth, newdepth, num = newline.split('::')
            num_formula = float(num)

            # print num_formula
            p = ForNestedIf.ExcelParser()
            p.parse('='+oldformula)
            old_depth = p.get_nested_ifs()
            p.parse('=' + newformula)
            new_depth = p.get_nested_ifs()
            depthreduce = old_depth - new_depth
            if depthreduce>0:
                totalcount += num_formula
                proportion = depthreduce/old_depth
                writefile.write(olddepth+','+newdepth+','+str(depthreduce)+','+str(proportion)+','+num+'\n')


        else:
            break
    writefile.close()
コード例 #15
0
def get_simplified_MATCH_pattern(formula, threepartlist):
    condition_part_list = threepartlist[0]
    true_part_list = threepartlist[1]
    false_part_list = threepartlist[2]

    before_equal_list = []
    after_equal_list = []
    p = ForNestedIf.ExcelParser()

    for each in condition_part_list:
        before = each.split('=')[0]
        after = each.split('=')[1]

        try:
            p.parse('=' + after)
        except:
            # print 'exception: cannot parse after equal part: ' + formula
            return False

        after_equal_list.append(after)
        before_equal_list.append(before)

    match_string = ''

    for each in after_equal_list:
        match_string+=each+','

    number =  float(true_part_list[1]) - float(true_part_list[0])

    match_string = match_string[:-1] #remove the last ','
    if (float(true_part_list[1]) - float(true_part_list[0]) == 1.0) and true_part_list[0] == '1':
        final = 'IFERROR(MATCH('+before_equal_list[0]+',{'+match_string+'},0),'+false_part_list[-1]+')'
    else:
        final = 'IFERROR((MATCH('+before_equal_list[0]+',{'+match_string+'},0)-1)*('+true_part_list[1]+'-'+true_part_list[0]+')+'+true_part_list[0]+','+false_part_list[-1]+')'

    return final
コード例 #16
0
def get_simplified_USELESS_pattern(formula):

    p = ForNestedIf.ExcelParser()
    try:
        p.parse('='+formula)
    except:
        return False
    all_innerif_list = p.get_all_innerif_list()
    all_innerif_list.append(formula)

    if len(all_innerif_list) == 0:
        return False

    for eachone in all_innerif_list:


        # if eachone.count('IF') != 1:
        #     continue
        p.parse('='+eachone)

        threeparts = p.get_threeparts_IF()




        if (threeparts[2]+'='+threeparts[1]) == threeparts[0]:

            replacestring = 'IF('+threeparts[0]+','+threeparts[1]+','+threeparts[2]+')'

            return formula.replace(replacestring,threeparts[2])
        elif (threeparts[1]+'='+threeparts[2]) == threeparts[0]:

            replacestring = 'IF('+threeparts[0] + ',' + threeparts[1] + ',' + threeparts[2]+')'
            return formula.replace(replacestring, threeparts[1])

    return False
コード例 #17
0
def deal_with_redundancy_oneround(formula,dic):

    originalfomula = formula

    contain = False
    condition_list = []
    returnlist = []
    p = ForNestedIf.ExcelParser()
    # print '--------'
    if 'condition_list' not in dic:
        return False
    condition_list = dic['condition_list']


    returnlist.append(formula)

    oppo_list = []
    # dic_oper_oppo = {}
    # dic_oper_oppo['='] = ['!=', '<>']
    # dic_oper_oppo['>='] = ['<']
    # dic_oper_oppo['<='] = ['>']
    # dic_oper_oppo['>'] = ['<=']
    # dic_oper_oppo['<'] = ['>=']
    # dic_oper_oppo['!='] = ['=']
    # dic_oper_oppo['<>'] = ['=']




    for key in dic:
        if 'condition_list' in key:
            continue
        if len(dic[key]) > 2:
            ccc = 0

            while ccc<(len(dic[key])/2):
                ccc+=2

                redun = 'IF('+key+','+dic[key][ccc]+','+dic[key][ccc+1]+')'
                if ',)' in redun:
                    redun = redun.replace(',)', ')')

                if redun in dic[key][0]:

                    # print '1'
                    contain = True
                    newstring = dic[key][0].replace(redun,dic[key][ccc])
                    formula=formula.replace(dic[key][0],newstring)
                elif redun in dic[key][1]:
                    # print '2'
                    contain = True
                    newstring = dic[key][1].replace(redun,dic[key][ccc+1])
                    formula = formula.replace(dic[key][1],newstring)


        if 'IF' not in key and (key != ''):
            newkey = get_oppo_logicformula(key)
            oppo_list.append(newkey)

            # if p.tokens.items[1].tvalue != '' and p.tokens.items[0].ttype != 'function':
            #     for i in dic_oper_oppo[p.tokens.items[1].tvalue]:
            #         if p.tokens.items[2].tvalue == '':
            #             oppo_list.append(p.tokens.items[0].tvalue + i + '\"\"')
            #         else:
            #             substring = ''
            #             cccc = 0
            #             while(cccc < len(p.tokens.items)-2):
            #                 substring += p.tokens.items[cccc+2].tvalue
            #                 cccc += 1
            #             oppo_list.append(p.tokens.items[0].tvalue + i + substring)
    if len(set(condition_list).intersection(oppo_list)) != 0:
        # print '3'

        for i in oppo_list:
            if i in condition_list:
                jone = ''

                jone = get_oppo_logicformula(i)
                if jone not in dic:
                    continue
                redun = 'IF(' + jone + ',' + dic[jone][0] + ',' + dic[jone][1] + ')'


                if ',)' in redun:
                    redun = redun.replace(',)',')')


                if redun in dic[i][0]:
                    newstring = dic[i][0].replace(redun,dic[jone][1])
                    formula = formula.replace(dic[i][0], newstring)
                    contain = True
                elif redun in dic[i][1]:
                    newstring = dic[i][1].replace(redun,dic[jone][0])
                    formula = formula.replace(dic[i][1], newstring)
                    contain = True
            # if contain:
            #     returnlist.append(formula)
                # return returnlist
    isredun = False
    TFlist = []
    # deal with condition combination
    each = ''
    andboolean = True

    for eachcondition_list in condition_list:
        conditionresults = deal_with_AND_condition(eachcondition_list)
        if conditionresults:
            for each in conditionresults:
                if conditionresults[each] not in condition_list:
                    andboolean = False

            if andboolean:

                isredun =  True
                each = eachcondition_list

                break


    if not isredun:
        if contain and formula != originalfomula:
            returnlist.append(formula)
            if len(returnlist) == 2:
                return returnlist
        else:
            return False




    for eachconditionresults in conditionresults:

        eachconditionresults = conditionresults[eachconditionresults]
        oppo_eachconditionresults = get_oppo_logicformula(eachconditionresults)



        if eachconditionresults in dic:


            if each in dic[eachconditionresults][0]:
                TFlist.append('T')
            elif each in dic[eachconditionresults][1]:

                TFlist.append('F')
            elif eachconditionresults in dic[each][0]:

                redun = 'IF(' + eachconditionresults + ',' + dic[eachconditionresults][0] + ',' + dic[eachconditionresults][1] + ')'
                newstring = dic[each][0].replace(redun,dic[eachconditionresults][0])
                formula = formula.replace(dic[each][0],newstring)
                if formula != originalfomula:
                    returnlist.append(formula)
                    if len(returnlist) == 2:

                        return returnlist


        elif oppo_eachconditionresults != '':
            if oppo_eachconditionresults in dic:
                if each in dic[oppo_eachconditionresults][0]:
                    TFlist.append('F')
                elif each in dic[oppo_eachconditionresults][1]:
                    TFlist.append('T')
                elif oppo_eachconditionresults in dic[each][0]:
                    redun = 'IF(' + oppo_eachconditionresults + ',' + dic[oppo_eachconditionresults][0] + ',' + dic[oppo_eachconditionresults][1] + ')'
                    newstring = dic[each][0].replace(redun,dic[oppo_eachconditionresults][0])
                    formula = formula.replace(dic[each][0],newstring)
                    if formula != originalfomula:
                        returnlist.append(formula)
                        if len(returnlist) == 2:
                            return returnlist

            else:
                return False


        redun = 'IF(' + each + ',' + dic[each][0] + ',' + dic[each][1] + ')'
        if ',)' in redun:
            redun = redun.replace(',)',')')

        if 'F' in TFlist:
            contain = True
            formula = formula.replace(redun,dic[each][1])
            if formula != originalfomula:
                returnlist.append(formula)
        elif 'T' in TFlist:
            contain = True
            formula = formula.replace(redun, dic[each][0])
            if formula != originalfomula:
                returnlist.append(formula)

        if returnlist and len(returnlist) == 2:

            return returnlist
    return False
コード例 #18
0
def redundancy_loop(formula, stopboolean,canrefactorboolean):
    formula = formula.replace(' ','')
    if stopboolean:

        para_if_list =ForNestedIf.get_para_iflist_second(formula)


        if not para_if_list:
            stopboolean = True
            if canrefactorboolean:
                return formula
            else:
                return False

        for each in para_if_list:
            dic = {}

            get_condition_values_dic(formula, dic)
            if dic == '':
                return False


            returnlist = deal_with_redundancy_oneround(formula, dic)





            if returnlist:
                stopboolean = False
                if each in formula:
                    canrefactorboolean = True

                    formula = formula.replace(each,returnlist[1])



        p = ForNestedIf.ExcelParser()
        p.parse('='+formula)
        threeparts_list = p.get_threeparts_IF()


        for eachthree in threeparts_list:


            dic = {}

            get_condition_values_dic(eachthree, dic)
            if dic == '':
                return False

            returnresultspart = deal_with_redundancy_oneround(eachthree, dic)

            if returnresultspart:
                stopboolean = False
                if eachthree in formula:
                    canrefactorboolean = True

                    formula = formula.replace(eachthree, returnresultspart[1])

        if canrefactorboolean:
            redundancy_loop(formula, stopboolean,canrefactorboolean)
    if canrefactorboolean:
        return formula
    else:
        return False
コード例 #19
0
def main_for_ALL(numstring):

    p = ForNestedIf.ExcelParser()
    filenamelist = []

    filepath = 'D:\\Users\\v-jizha4\\results\\metrix-results\\Total-Matrix-' + numstring+'.txt'
    readfile = open(filepath)

    count = 0

    while (True):

        path = ''
        formula = ''
        truefalse = ''



        eachline = readfile.readline()

        if not eachline:
            break
        eachline = eachline.strip()
        i = eachline
        if i == '':
            continue


        if '[#This Row],' in i:
            i = i.replace('[#This Row],', '')
        count += 1
        if count <4178:
            continue


        path,formula,truefalse = i.split('::')

        truefalse_list = truefalse.split(',')

        try:
            if '.xlsm' in path:
                excelname = path.split('xlsm')[0]+'xlsm'
                sheetname = path[path.index('.xlsm')+5:path.index('.txt')]
            if '.xlsx' in path:
                excelname = path.split('xlsx')[0] + 'xlsx'
                sheetname = path[path.index('.xlsx') + 5:path.index('.txt')]
        except:
            print path



        i = formula

        try:
            p.parse(i)
        except:
            continue

        if count % 1000 == 0:
            print 'Matrix-'+numstring+': ' +str(count)

        threepartlist = [[], [], []]
        p.get_list_threeparts(i, threepartlist)

        write_filename = 'D:\\Users\\v-jizha4\\results\\after-refactor-results\\Refactored-'+numstring+'.txt'
        writefile = open(write_filename, 'a')

        if truefalse_list[0] == 'True':
            continue

        try:



            if truefalse_list[1] == 'True':
                newformula = get_simplified_AND_pattern(i,threepartlist)
                writefile.write(excelname+'::'+sheetname+'::'+formula+'::'+newformula+'::'+truefalse+'::AND\n')
                continue
            elif truefalse_list[2] == 'True':
                newformula = get_simplified_OR_pattern(i, threepartlist)
                writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula+'::'+truefalse+'::OR\n')
                continue
            elif truefalse_list[5] == 'True':
                newformula = get_simplified_CHOOSE_pattern(i, threepartlist)
                writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula+'::'+truefalse+'::CHOOSE\n')
                continue
            elif truefalse_list[6] == 'True':
                newformula = get_simplified_MATCH_pattern(i, threepartlist)
                writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula +'::'+truefalse+ '::MATCH\n')
                continue
            elif truefalse_list[3] == 'True':
                newformula = get_simplified_LOOKUP_pattern(i, excelname, sheetname, threepartlist)

                writefile.write(excelname+'::'+sheetname+'::'+formula+'::'+newformula+'::'+truefalse+'::LOOKUP\n')
                continue
            elif truefalse_list[4] == 'True':
                newformula = get_simplified_ID_LOOKUP_pattern(i, excelname, sheetname, threepartlist)

                writefile.write(excelname+'::'+sheetname+'::'+formula+'::'+newformula+'::'+truefalse+'::LOOKUP\n')
                continue

            elif truefalse_list[8] == 'True':
                newformula = get_simplified_MAXMIN_pattern(i, threepartlist)
                writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula +'::'+truefalse+ '::MAXMIN\n')
                continue
            elif truefalse_list[9] == 'True':
                newformula = get_simplified_USELESS_pattern(i, threepartlist)
                writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula +'::'+truefalse+ '::USELESS\n')
                continue
            elif truefalse_list[10] == 'True':
                newformula = get_simplified_IFS_pattern(i, threepartlist)
                writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula +'::'+truefalse+ '::IFS\n')
                continue


        except:

            continue




        writefile.close()
コード例 #20
0
def generate_classifyequal():
    p = ForNestedIf.ExcelParser()

    filenamestring = 'CONEQUAL.txt'
    filepath = 'D:\\Users\\v-jizha4\\ExcelExp\\expAnalyResults\\totalsplits\\' + filenamestring
    readfile = open(filepath)

    choose_filename = 'D:\\Users\\v-jizha4\\ExcelExp\\expAnalyResults\\totalsplits\\CHOOSE.txt'
    choose_write = open(choose_filename,'w')

    innerandfilename = 'D:\\Users\\v-jizha4\\ExcelExp\\expAnalyResults\\totalsplits\\innerand_new.txt'
    andwrite = open(innerandfilename, 'w')

    newform_filename = 'D:\\Users\\v-jizha4\\ExcelExp\\expAnalyResults\\totalsplits\\newform_new.txt'
    otherwrite = open(newform_filename, 'w')





    lines = readfile.readlines()
    returnlist = []
    filenamelist = []
    for eachline in lines:
        eachline = eachline.strip()
        if 'D:\Users' in eachline:
            filenamelist.append(eachline)
            continue

        returnlist.append(eachline)
    analyzelist = returnlist
    count = 0
    and_count = 0
    or_count = 0


    for i in analyzelist:
        count+=1
        p.parse('='+i)
        dic = p.get_dic_depth_token()
        result = classify_equal_formulas(i)
        if result == 'ischoose':
            print '-----------------------'
            print filenamelist[count-1]
            print i
            and_count+=1
            print "CHOOSE count: ",and_count
            print "total count: ",count

            choose_write.write(filenamelist[count-1]+'\n')
            choose_write.write(i+'\n')
        elif result == 'isand':
            print '-----------------------'
            print filenamelist[count - 1]
            print i
            or_count += 1
            print "Inner and count: ", or_count
            print "total count: ", count
            andwrite.write(filenamelist[count - 1] + '\n')
            andwrite.write(i + '\n')
        else:

            otherwrite.write(filenamelist[count - 1] + '\n')
            otherwrite.write(i + '\n')

    choose_write.close()
    andwrite.close()
    otherwrite.close()
コード例 #21
0
def return_list_ifis_equal(formula, threepartlist):
    # order: choose, match, lookup, idlookup,
    dic_choose_match_lookup_idlookup = {}
    dic_choose_match_lookup_idlookup['choose'] = False
    dic_choose_match_lookup_idlookup['match'] = False
    dic_choose_match_lookup_idlookup['lookup'] = False
    dic_choose_match_lookup_idlookup['idlookup'] = False

    p = ForNestedIf.ExcelParser()
    condition_part_list = threepartlist[0]
    true_part_list = threepartlist[1]
    false_part_list = threepartlist[2]
    before_equal_list = []
    after_equal_list = []
    after_equal_type_list = []
    true_part_type_list = []


    for each in condition_part_list:
        if '=' not in each or '<=' in each or '>=' in each:
            return False
        dic_choose_match_lookup_idlookup['idlookup'] = True
        before = each.split('=')[0]
        after = each.split('=')[1]

        try:
            p.parse('=' + after)
        except:
            # print 'exception: cannot parse after equal part: ' + formula
            return False
        after_type = p.tokens.items[0].tsubtype
        after_equal_type_list.append(after_type)

        after_equal_list.append(after)
        before_equal_list.append(before)

        # before equal has to be the same
    if not len(set(before_equal_list)) == 1:
        return False

    for each in true_part_list:
        try:
            p.parse('=' + each)
            truepart_type = p.tokens.items[0].tsubtype
            true_part_type_list.append(truepart_type)
        except:
            # print 'exception: cannot parse truevalue part: ' + formula
            return False

    # true value type and after equal type need to be the same
    if not len(set(true_part_type_list)) == 1 or (not len(set(after_equal_type_list)) == 1):
        return dic_choose_match_lookup_idlookup  # can only be indirect vlookup
    if set(after_equal_type_list) == set(['range']):
        if set(true_part_type_list) == set(['number']) and furtherAnalysis.is_arithmetic(true_part_list):
            dic_choose_match_lookup_idlookup['match'] = True
            return dic_choose_match_lookup_idlookup
        elif set(true_part_type_list) == set(['range']):
            dic_choose_match_lookup_idlookup['lookup'] = True
            return dic_choose_match_lookup_idlookup
        elif set(true_part_type_list) == set(['text']):
            return dic_choose_match_lookup_idlookup
    if set(after_equal_type_list) == set(['number']) and furtherAnalysis.is_arithmetic(after_equal_list):
        if set(true_part_type_list) == set(['number']) and furtherAnalysis.is_arithmetic(true_part_list):
            dic_choose_match_lookup_idlookup['match'] = True
            return dic_choose_match_lookup_idlookup
        elif set(true_part_type_list) == set(['range']) or (set(true_part_type_list) == set(['text'])):
            dic_choose_match_lookup_idlookup['choose'] = True
            return dic_choose_match_lookup_idlookup
    if set(after_equal_type_list) == set(['text']):
        if set(true_part_type_list) == set(['number']) and furtherAnalysis.is_arithmetic(true_part_list):
            dic_choose_match_lookup_idlookup['match'] = True
            return dic_choose_match_lookup_idlookup

    return dic_choose_match_lookup_idlookup
コード例 #22
0
def whole_process_loop(dic, filepath, total_or_unique):
    original_formu_path = filepath+'nestif-'+total_or_unique+'-original.txt'
    refactor_result_path = filepath+'nestif-'+total_or_unique+'-refactored.txt'
    failpath =  filepath+'nestif-'+total_or_unique+'-fail.txt'
    nodepthreducefile =  filepath+'nestif-zeroreduce-'+total_or_unique+'.txt'

    readfile = open(original_formu_path)
    writesuccess = open(refactor_result_path, 'w')
    writefail = open(failpath,'w')
    zeroreduce = open(nodepthreducefile, 'w')

    count = 0
    totalcount = 0


    while (True):
        thisline = readfile.readline()
        if not thisline:
            break
        totalcount +=1

        try:
            excelfilename, formula,  num = thisline.strip().split('::')
        except:
            try:
                excelfilename, formula = thisline.strip().split('::')
            except:
                print '1============================'+thisline
                continue

        if '#REF' in formula or '#DIV/0' in formula or '#VALUE' in formula:
            print '2============================'
            continue

        formula = predealwith_formula(formula)

        if not formula:
            print '3============================'+thisline
            continue
        originalformu = formula

        print formula
        dic = {}
        dic['and'] = False
        dic['or'] = False
        dic['lookup'] = False
        dic['idlookup'] = False
        dic['choose'] = False
        dic['match'] = False
        dic['ifs'] = False
        dic['maxmin'] = False
        dic['useless'] = False
        dic['redun'] = False



        returnlist = dealWithRudunPatterns.redundancy_loop_NEW(formula)


        if returnlist != formula:
            isother = False
            dic['redun'] = True

            formula = returnlist
        try:

            newformula = refactor_loop_NEW(formula, dic)
        except:
            continue

        if newformula and newformula!= originalformu:

            isother = False
            if count % 100 == 0:
                print count

            count += 1


            truefalsestring = str(dic['and']) + '::' + str(dic['or']) + '::' + str(dic['lookup']) + '::' + str(
                dic['idlookup']) + '::' + str(dic['choose']) + '::' + str(
                    dic['match']) + '::' + str(dic['ifs']) + '::' +str(dic['maxmin']) + '::' + str(dic['useless']) + '::' + str(dic[
                        'redun'])

            p = ForNestedIf.ExcelParser()
            p.parse('='+originalformu)
            olddepth = p.get_nested_ifs()
            try:
                p.parse('=' + newformula)
            except:
                writesuccess.write(
                    excelfilename+'::'+originalformu + '::' + newformula + '::' + truefalsestring  + '\n')
                continue
            newdepth = p.get_nested_ifs()
            depthreduce = olddepth - newdepth
            if depthreduce == 0:
                zeroreduce.write(
                    excelfilename+'::'+originalformu + '::' + newformula + '::' + truefalsestring + '::' + str(olddepth) + '::' + str(newdepth) +'\n')

            else:
                writesuccess.write(excelfilename+'::'+originalformu + '::' + newformula+ '::'+truefalsestring+'::'+str(olddepth)+'::'+str(newdepth)+'\n')
        else:
            # print formula
            writefail.write(excelfilename+'::'+originalformu+'\n')

    writesuccess.close()
    writefail.close()
    zeroreduce.close()
    print count
コード例 #23
0
def pattern_match_oneround(formula,dic):

    # input: a if formula
    # output: return the new formula if pattern exists, otherwise return false

    isother = True

    p = ForNestedIf.ExcelParser()
    try:
        p.parse('=' + formula)
    except:
        return False
    threepartlist = [[], [], []]

    p.get_list_threeparts(formula, threepartlist)

    if threepartlist[1] == '' and  threepartlist[2] == '':
        formula = threepartlist[0]
        try:
            p.parse('=' + formula)
        except:
            return False
    newformula = getReducedFormulas.get_simplified_AND_pattern(formula, threepartlist)
    if newformula:
        isother = False

        modify_truefalsedic(dic,'and')
        return newformula

    newformula = getReducedFormulas.get_simplified_OR_pattern(formula, threepartlist)
    if newformula:
        isother = False
        modify_truefalsedic(dic, 'or')
        return newformula
    newformula = return_true_ifis_otherOR(formula, threepartlist)
    if newformula:
        isother = False
        modify_truefalsedic(dic, 'or')
        return newformula

    if return_list_ifis_equal(formula, threepartlist) and formula.count('IF') > 3:



        isother = False
        dic_small = return_list_ifis_equal(formula, threepartlist)


        # dic_choose_match_lookup_idlookup
        if dic_small['choose'] == True:

            newformula = getReducedFormulas.get_simplified_CHOOSE_pattern(formula, threepartlist)


            if newformula:
                modify_truefalsedic(dic, 'choose')
                return newformula

        if dic_small['match'] == True:

            newformula = getReducedFormulas.get_simplified_MATCH_pattern(formula, threepartlist)
            if newformula:
                modify_truefalsedic(dic, 'match')
                return newformula

        if dic_small['lookup'] == True:

            newformula = getReducedFormulas.get_simplified_LOOKUP_pattern(formula, threepartlist)
            if newformula:
                modify_truefalsedic(dic, 'lookup')
                return newformula

        if dic_small['idlookup'] == True:
            newformula = getReducedFormulas.get_simplified_ID_LOOKUP_pattern(formula,
                                                                             threepartlist)

            if newformula:
                modify_truefalsedic(dic, 'idlookup')
                return newformula

    newformula = getReducedFormulas.get_simplified_MAXMIN_pattern(formula)
    if newformula:
        isother = False
        modify_truefalsedic(dic, 'maxmin')
        return newformula

    newformula = getReducedFormulas.get_simplified_USELESS_pattern(formula)

    if newformula:
        isothere = False
        modify_truefalsedic(dic, 'useless')
        return newformula

    newformula = getReducedFormulas.get_simplified_IFS_pattern(formula, threepartlist)


    if newformula:
        isother = False
        modify_truefalsedic(dic, 'ifs')
        return newformula

    return False