def see_if_iserror(): p = ForNestedIf.ExcelParser() filenamestring = 'CONEQUAL.txt' filepath = 'D:\\Users\\v-jizha4\\ExcelExp\\expAnalyResults\\totalsplits\\' + filenamestring readfile = open(filepath) lines = readfile.readlines() returnlist = [] filenamelist = [] for eachline in lines: eachline = eachline.strip() if 'D:\Users' in eachline: filenamelist.append(eachline) continue returnlist.append(eachline) analyzelist = returnlist count = 0 for i in analyzelist: p.parse('='+i) dic = p.get_dic_depth_token() if for_iserror(i,dic): count += 1 print count
def return_true_ifis_useless(formula): p = ForNestedIf.ExcelParser() try: p.parse('=' + formula) except: return False all_innerif_list = p.get_all_innerif_list() all_innerif_list.append(formula) if len(all_innerif_list) == 0: return False for eachone in all_innerif_list: try: p.parse('=' + eachone) except: return False threeparts = p.get_threeparts_IF() if (threeparts[1] + '=' + threeparts[2]) == threeparts[0]: return True elif (threeparts[2] + '=' + threeparts[1]) == threeparts[0]: return True return False
def get_simplified_CHOOSE_pattern(formula, threepartlist): condition_part_list = threepartlist[0] true_part_list = threepartlist[1] false_part_list = threepartlist[2] before_equal_list = [] after_equal_list = [] p = ForNestedIf.ExcelParser() for each in condition_part_list: before = each.split('=')[0] after = each.split('=')[1] try: p.parse('=' + after) except: return False after_equal_list.append(after) before_equal_list.append(before) choose_string = '' for each in true_part_list: choose_string+=each+',' initial = '('+ before_equal_list[0]+'-'+after_equal_list[0]+')/('+after_equal_list[1]+'-'+after_equal_list[0]+')' choose_string = choose_string[:-1] #remove the last ',' final = 'IFERROR(CHOOSE('+str(initial)+'+1 ,'+choose_string+'),'+false_part_list[-1]+')' return final
def classify_equal_formulas(formula): ischoose = False isand = False isnewform = False p = ForNestedIf.ExcelParser() p.parse('='+formula) dic = p.get_dic_depth_token() returnlist = p.get_dic_for_equal(dic) dic_condition_value = returnlist[0] true_value_list = returnlist[3] try: num_value_list = [] for item in true_value_list: num_value_list.append(float(item)) print num_value_list if is_arithmetic(num_value_list): ischoose = True return 'ischoose' else: if len(set(true_value_list)) < len(true_value_list): isand = True return ' ' else: return 'isnewform' except: if len(set(true_value_list)) < len(true_value_list): isand = True return 'isand' else: isnewform = True return 'isnewform'
def get_final_func_result(sourcefilepath,resultfilepath): writefile = open(resultfilepath,'w') readfile = open(sourcefilepath) count = 0 lines = readfile.readlines() for newline in lines: newline = newline.strip() if newline: count +=1 try: excelname,oldformula,newformula, tfand, tfor, tflookup, tfidlookup, tfchoose, tfmatch, tfifs, tfmaxmin, tfuseless, ifredun,olddepth, newdepth = newline.split('::') except: continue # print num_formula p = ForNestedIf.ExcelParser() p.parse('='+oldformula) old_depth = p.get_nested_ifs() p.parse('=' + newformula) new_depth = p.get_nested_ifs() depthreduce = old_depth - new_depth proportion = depthreduce/old_depth writefile.write(olddepth+','+newdepth+','+str(depthreduce)+','+str(proportion)+','+tfand+','+ tfor+','+tflookup+','+tfidlookup+','+ tfchoose+','+ tfmatch+','+ tfifs+','+ tfmaxmin+','+ tfuseless+','+ ifredun+'\n') else: continue writefile.close()
def return_true_ifis_MAXMIN(formula): p = ForNestedIf.ExcelParser() try: p.parse('=' + formula) except: return False all_innerif_list = p.get_all_innerif_list() all_innerif_list.append(formula) if len(all_innerif_list) == 0: return False for eachone in all_innerif_list: if eachone.count('IF') != 1: continue threeparts = p.get_threeparts_IF() for eachsymbol in ['<', '>', '<=', '>=']: if (threeparts[1] + eachsymbol + threeparts[2]) == threeparts[0] or ( threeparts[1] + eachsymbol + '(' + threeparts[2] + ')') == threeparts[0] or ( '(' + threeparts[1] + ')' + eachsymbol + threeparts[2]) == threeparts[0] or ( '(' + threeparts[1] + ')' + eachsymbol + '(' + threeparts[2] + ')') == \ threeparts[0]: return True if (threeparts[2] + eachsymbol + threeparts[1]) == threeparts[0] or ( threeparts[2] + eachsymbol + '(' + threeparts[1] + ')') == threeparts[0] or ( '(' + threeparts[2] + ')' + eachsymbol + threeparts[1]) == threeparts[0] or ( '(' + threeparts[2] + ')' + eachsymbol + '(' + threeparts[1] + ')') == \ threeparts[0]: return True return False
def deal_with_AND_condition(formula): p = ForNestedIf.ExcelParser() try: p.parse('='+formula) except: return False stringdic = {} indent = 0 count = 0 stringdic[count] = '' isAndPattern = False # contain AND or & total_count = 0 if p.tokens: if not p.tokens.items: return False if p.tokens.items[0].tvalue == 'AND': isAndPattern = True elif '&' in formula: isAndPattern = True if not isAndPattern: return False for t in p.tokens.items: isrange_anothersheet = False newvalue = '' if '!' in t.tvalue and t.tsubtype == 'range': newvalue = '\'' + t.tvalue.split('!')[0] + '\'' + '!' + t.tvalue.split('!')[1] isrange_anothersheet = True if (t.tsubtype == p.TOK_SUBTYPE_STOP): indent -= 1 if (indent == 1 and t.tvalue == ',') or (indent == 0 and t.tvalue == '&'): if 'AND' in stringdic[count]: stringdic[count] = stringdic[count].replace('AND','') count += 1 stringdic[count] = '' continue else: if isrange_anothersheet: stringdic[count] += newvalue elif t.tsubtype == 'text': stringdic[count] += '\"' + t.tvalue + '\"' elif t.tsubtype == 'number' and t.tvalue == '': stringdic[count] += '\"\"' else: stringdic[count] += t.tvalue if (t.tsubtype == p.TOK_SUBTYPE_START): indent += 1; return stringdic
def redundancy_loop_NEW(formula): formula = formula.replace(' ', '') containredun = True while(containredun): containredun = False para_if_list = ForNestedIf.get_para_iflist_second(formula) if not para_if_list: para_if_list = [] p = ForNestedIf.ExcelParser() try: p.parse('=' + formula) except: return False threeparts_list = p.get_threeparts_IF() for eachlist in threeparts_list: innerpara_if_list = ForNestedIf.get_para_iflist_second(eachlist) if innerpara_if_list: for eachinner in innerpara_if_list: para_if_list.append(eachinner) if not para_if_list or len(para_if_list) == 0: return formula for each in para_if_list: dic = {} get_condition_values_dic(formula, dic) if dic == '': return False returnlist = deal_with_redundancy_oneround(each, dic) # print 'returnlist: ', returnlist if returnlist: if each in formula: containredun = True formula = formula.replace(each,returnlist[1]) p = ForNestedIf.ExcelParser() return formula
def refactor_loop_NEW(formula,dic): if formula: formula = formula.replace(' ', '') else: return False canrefactor = True while canrefactor: canrefactor = False para_if_list = ForNestedIf.get_para_iflist_second(formula) if not para_if_list: para_if_list = [] p = ForNestedIf.ExcelParser() p.parse('=' + formula) threeparts_list = p.get_threeparts_IF() for eachlist in threeparts_list: innerpara_if_list = ForNestedIf.get_para_iflist_second(eachlist) if innerpara_if_list: for eachinner in innerpara_if_list: para_if_list.append(eachinner) if not para_if_list or len(para_if_list) == 0: return formula for each in para_if_list: returnresults = pattern_match_oneround(each, dic) if returnresults and each in formula: canrefactor = True formula = formula.replace(each, returnresults) p.parse('='+formula) newlen = p.get_nested_ifs() if newlen <2: break return formula
def predealwith_formula(formula): formula = formula.strip().replace(' ', '') if '[#ThisRow],' in formula: formula = formula.replace('[#ThisRow],', '') p = ForNestedIf.ExcelParser() try: p.parse('='+formula) except: return False # if '+' not in formula: # return formula else: indent = 0 returnstring = "" stopindent = 0 stop = False if p.tokens: while (p.tokens.moveNext()): token = p.tokens.current(); t = token if (t.tsubtype == p.TOK_SUBTYPE_STOP): indent -= 1 if indent == (stopindent - 1): returnstring += t.tvalue + ')' if (t.tsubtype == p.TOK_SUBTYPE_START): returnstring += t.tvalue + '(' elif (t.tsubtype == p.TOK_SUBTYPE_STOP): returnstring += t.tvalue + ')' else: if t.tsubtype == 'text': returnstring += '\"' + t.tvalue + '\"' elif t.tsubtype == 'number' and t.tvalue == '': returnstring += '\"\"' else: returnstring += t.tvalue if (t.tsubtype == p.TOK_SUBTYPE_START): indent += 1; return returnstring.strip()
def get_simplified_MAXMIN_pattern(formula): p = ForNestedIf.ExcelParser() try: p.parse(formula) except: return False all_innerif_list = p.get_all_innerif_list() all_innerif_list.append(formula) if len(all_innerif_list) == 0: return False for eachone in all_innerif_list: final = '' if eachone.count('IF') != 1: continue threeparts = p.get_threeparts_IF() for eachsymbol in ['<','>','<=','>=']: if (threeparts[1] + eachsymbol + threeparts[2]) == threeparts[0] or ( threeparts[1] + eachsymbol + '(' + threeparts[2] + ')') == threeparts[0] or ( '(' + threeparts[1] + ')' + eachsymbol +threeparts[2]) == threeparts[0] or ( '(' + threeparts[1] + ')' + eachsymbol+ '(' + threeparts[2] + ')') == threeparts[0]: if eachsymbol == '>' or eachsymbol == '>=': final = 'MAX('+threeparts[1]+','+threeparts[2]+')' elif eachsymbol == '<' or eachsymbol == '<=': final = 'MIN(' + threeparts[1] + ',' + threeparts[2] + ')' if (threeparts[2] + eachsymbol + threeparts[1]) == threeparts[0] or ( threeparts[2] + eachsymbol + '(' + threeparts[1] + ')') == threeparts[0] or ( '(' + threeparts[2] + ')' + eachsymbol +threeparts[1]) == threeparts[0] or ( '(' + threeparts[2] + ')' + eachsymbol+ '(' + threeparts[1] + ')') == threeparts[0]: if eachsymbol == '>' or eachsymbol == '>=': final = 'MIN('+threeparts[1]+','+threeparts[2]+')' elif eachsymbol == '<' or eachsymbol == '<=': final = 'MAX(' + threeparts[1] + ',' + threeparts[2] + ')' if final != '': return formula.replace(eachone,final) return False
def get_para_iflist(formula): # input: the formula that you want to refactor # output: a list of inner if parts. They do not contain each other. Return false if the formula is not nested if formula p = ForNestedIf.ExcelParser() try: p.parse('=' + formula) except: return False ifreturnfalse = True returnlist = p.get_para_if() for each in returnlist: if (each.count('IF') - each.count('IFS')) > 1 and (each.count('IF') - each.count('IFERROR')) > 1 and ( each.count('IF') - each.count('IFNA')) > 1: ifreturnfalse = False if not ifreturnfalse: return returnlist else: return False
def get_condition_values_dic(formula, dic): p = ForNestedIf.ExcelParser() try: p.parse('='+formula) if p.tokens.items[0].tvalue != 'IF': p.parse('='+p.get_onlyIFfunction()) threeparts = p.get_threeparts_IF() if 'condition_list' in dic: dic['condition_list'].append(threeparts[0]) else: dic['condition_list'] = [threeparts[0]] if threeparts[0] in dic: dic[threeparts[0]].append(threeparts[1]) else: dic[threeparts[0]] = [threeparts[1]] if threeparts[0] in dic: dic[threeparts[0]].append(threeparts[2]) else: dic[threeparts[0]] = [threeparts[2]] if 'IF' in formula: for each in threeparts: if 'IF' in each: p.parse('='+each) new = p.get_onlyIFfunction() get_condition_values_dic(new,dic) except: dic = {}
def deal_with_zeroreduce(sourcefilepath,resultfilepath): writefile = open(resultfilepath,'a') readfile = open(sourcefilepath) count = 0 totalcount = 0 while(True): newline = readfile.readline().strip() depthreduce = 0 if newline: count +=1 if count%1000 == 0: print count path, sheetname,oldformula,newformula, tfand, tfor, tflookup, tfidlookup, tfchoose, tfmatch, tfifs, tfmaxmin, tfuseless, ifredun,olddepth, newdepth, num = newline.split('::') num_formula = float(num) # print num_formula p = ForNestedIf.ExcelParser() p.parse('='+oldformula) old_depth = p.get_nested_ifs() p.parse('=' + newformula) new_depth = p.get_nested_ifs() depthreduce = old_depth - new_depth if depthreduce>0: totalcount += num_formula proportion = depthreduce/old_depth writefile.write(olddepth+','+newdepth+','+str(depthreduce)+','+str(proportion)+','+num+'\n') else: break writefile.close()
def get_simplified_MATCH_pattern(formula, threepartlist): condition_part_list = threepartlist[0] true_part_list = threepartlist[1] false_part_list = threepartlist[2] before_equal_list = [] after_equal_list = [] p = ForNestedIf.ExcelParser() for each in condition_part_list: before = each.split('=')[0] after = each.split('=')[1] try: p.parse('=' + after) except: # print 'exception: cannot parse after equal part: ' + formula return False after_equal_list.append(after) before_equal_list.append(before) match_string = '' for each in after_equal_list: match_string+=each+',' number = float(true_part_list[1]) - float(true_part_list[0]) match_string = match_string[:-1] #remove the last ',' if (float(true_part_list[1]) - float(true_part_list[0]) == 1.0) and true_part_list[0] == '1': final = 'IFERROR(MATCH('+before_equal_list[0]+',{'+match_string+'},0),'+false_part_list[-1]+')' else: final = 'IFERROR((MATCH('+before_equal_list[0]+',{'+match_string+'},0)-1)*('+true_part_list[1]+'-'+true_part_list[0]+')+'+true_part_list[0]+','+false_part_list[-1]+')' return final
def get_simplified_USELESS_pattern(formula): p = ForNestedIf.ExcelParser() try: p.parse('='+formula) except: return False all_innerif_list = p.get_all_innerif_list() all_innerif_list.append(formula) if len(all_innerif_list) == 0: return False for eachone in all_innerif_list: # if eachone.count('IF') != 1: # continue p.parse('='+eachone) threeparts = p.get_threeparts_IF() if (threeparts[2]+'='+threeparts[1]) == threeparts[0]: replacestring = 'IF('+threeparts[0]+','+threeparts[1]+','+threeparts[2]+')' return formula.replace(replacestring,threeparts[2]) elif (threeparts[1]+'='+threeparts[2]) == threeparts[0]: replacestring = 'IF('+threeparts[0] + ',' + threeparts[1] + ',' + threeparts[2]+')' return formula.replace(replacestring, threeparts[1]) return False
def deal_with_redundancy_oneround(formula,dic): originalfomula = formula contain = False condition_list = [] returnlist = [] p = ForNestedIf.ExcelParser() # print '--------' if 'condition_list' not in dic: return False condition_list = dic['condition_list'] returnlist.append(formula) oppo_list = [] # dic_oper_oppo = {} # dic_oper_oppo['='] = ['!=', '<>'] # dic_oper_oppo['>='] = ['<'] # dic_oper_oppo['<='] = ['>'] # dic_oper_oppo['>'] = ['<='] # dic_oper_oppo['<'] = ['>='] # dic_oper_oppo['!='] = ['='] # dic_oper_oppo['<>'] = ['='] for key in dic: if 'condition_list' in key: continue if len(dic[key]) > 2: ccc = 0 while ccc<(len(dic[key])/2): ccc+=2 redun = 'IF('+key+','+dic[key][ccc]+','+dic[key][ccc+1]+')' if ',)' in redun: redun = redun.replace(',)', ')') if redun in dic[key][0]: # print '1' contain = True newstring = dic[key][0].replace(redun,dic[key][ccc]) formula=formula.replace(dic[key][0],newstring) elif redun in dic[key][1]: # print '2' contain = True newstring = dic[key][1].replace(redun,dic[key][ccc+1]) formula = formula.replace(dic[key][1],newstring) if 'IF' not in key and (key != ''): newkey = get_oppo_logicformula(key) oppo_list.append(newkey) # if p.tokens.items[1].tvalue != '' and p.tokens.items[0].ttype != 'function': # for i in dic_oper_oppo[p.tokens.items[1].tvalue]: # if p.tokens.items[2].tvalue == '': # oppo_list.append(p.tokens.items[0].tvalue + i + '\"\"') # else: # substring = '' # cccc = 0 # while(cccc < len(p.tokens.items)-2): # substring += p.tokens.items[cccc+2].tvalue # cccc += 1 # oppo_list.append(p.tokens.items[0].tvalue + i + substring) if len(set(condition_list).intersection(oppo_list)) != 0: # print '3' for i in oppo_list: if i in condition_list: jone = '' jone = get_oppo_logicformula(i) if jone not in dic: continue redun = 'IF(' + jone + ',' + dic[jone][0] + ',' + dic[jone][1] + ')' if ',)' in redun: redun = redun.replace(',)',')') if redun in dic[i][0]: newstring = dic[i][0].replace(redun,dic[jone][1]) formula = formula.replace(dic[i][0], newstring) contain = True elif redun in dic[i][1]: newstring = dic[i][1].replace(redun,dic[jone][0]) formula = formula.replace(dic[i][1], newstring) contain = True # if contain: # returnlist.append(formula) # return returnlist isredun = False TFlist = [] # deal with condition combination each = '' andboolean = True for eachcondition_list in condition_list: conditionresults = deal_with_AND_condition(eachcondition_list) if conditionresults: for each in conditionresults: if conditionresults[each] not in condition_list: andboolean = False if andboolean: isredun = True each = eachcondition_list break if not isredun: if contain and formula != originalfomula: returnlist.append(formula) if len(returnlist) == 2: return returnlist else: return False for eachconditionresults in conditionresults: eachconditionresults = conditionresults[eachconditionresults] oppo_eachconditionresults = get_oppo_logicformula(eachconditionresults) if eachconditionresults in dic: if each in dic[eachconditionresults][0]: TFlist.append('T') elif each in dic[eachconditionresults][1]: TFlist.append('F') elif eachconditionresults in dic[each][0]: redun = 'IF(' + eachconditionresults + ',' + dic[eachconditionresults][0] + ',' + dic[eachconditionresults][1] + ')' newstring = dic[each][0].replace(redun,dic[eachconditionresults][0]) formula = formula.replace(dic[each][0],newstring) if formula != originalfomula: returnlist.append(formula) if len(returnlist) == 2: return returnlist elif oppo_eachconditionresults != '': if oppo_eachconditionresults in dic: if each in dic[oppo_eachconditionresults][0]: TFlist.append('F') elif each in dic[oppo_eachconditionresults][1]: TFlist.append('T') elif oppo_eachconditionresults in dic[each][0]: redun = 'IF(' + oppo_eachconditionresults + ',' + dic[oppo_eachconditionresults][0] + ',' + dic[oppo_eachconditionresults][1] + ')' newstring = dic[each][0].replace(redun,dic[oppo_eachconditionresults][0]) formula = formula.replace(dic[each][0],newstring) if formula != originalfomula: returnlist.append(formula) if len(returnlist) == 2: return returnlist else: return False redun = 'IF(' + each + ',' + dic[each][0] + ',' + dic[each][1] + ')' if ',)' in redun: redun = redun.replace(',)',')') if 'F' in TFlist: contain = True formula = formula.replace(redun,dic[each][1]) if formula != originalfomula: returnlist.append(formula) elif 'T' in TFlist: contain = True formula = formula.replace(redun, dic[each][0]) if formula != originalfomula: returnlist.append(formula) if returnlist and len(returnlist) == 2: return returnlist return False
def redundancy_loop(formula, stopboolean,canrefactorboolean): formula = formula.replace(' ','') if stopboolean: para_if_list =ForNestedIf.get_para_iflist_second(formula) if not para_if_list: stopboolean = True if canrefactorboolean: return formula else: return False for each in para_if_list: dic = {} get_condition_values_dic(formula, dic) if dic == '': return False returnlist = deal_with_redundancy_oneround(formula, dic) if returnlist: stopboolean = False if each in formula: canrefactorboolean = True formula = formula.replace(each,returnlist[1]) p = ForNestedIf.ExcelParser() p.parse('='+formula) threeparts_list = p.get_threeparts_IF() for eachthree in threeparts_list: dic = {} get_condition_values_dic(eachthree, dic) if dic == '': return False returnresultspart = deal_with_redundancy_oneround(eachthree, dic) if returnresultspart: stopboolean = False if eachthree in formula: canrefactorboolean = True formula = formula.replace(eachthree, returnresultspart[1]) if canrefactorboolean: redundancy_loop(formula, stopboolean,canrefactorboolean) if canrefactorboolean: return formula else: return False
def main_for_ALL(numstring): p = ForNestedIf.ExcelParser() filenamelist = [] filepath = 'D:\\Users\\v-jizha4\\results\\metrix-results\\Total-Matrix-' + numstring+'.txt' readfile = open(filepath) count = 0 while (True): path = '' formula = '' truefalse = '' eachline = readfile.readline() if not eachline: break eachline = eachline.strip() i = eachline if i == '': continue if '[#This Row],' in i: i = i.replace('[#This Row],', '') count += 1 if count <4178: continue path,formula,truefalse = i.split('::') truefalse_list = truefalse.split(',') try: if '.xlsm' in path: excelname = path.split('xlsm')[0]+'xlsm' sheetname = path[path.index('.xlsm')+5:path.index('.txt')] if '.xlsx' in path: excelname = path.split('xlsx')[0] + 'xlsx' sheetname = path[path.index('.xlsx') + 5:path.index('.txt')] except: print path i = formula try: p.parse(i) except: continue if count % 1000 == 0: print 'Matrix-'+numstring+': ' +str(count) threepartlist = [[], [], []] p.get_list_threeparts(i, threepartlist) write_filename = 'D:\\Users\\v-jizha4\\results\\after-refactor-results\\Refactored-'+numstring+'.txt' writefile = open(write_filename, 'a') if truefalse_list[0] == 'True': continue try: if truefalse_list[1] == 'True': newformula = get_simplified_AND_pattern(i,threepartlist) writefile.write(excelname+'::'+sheetname+'::'+formula+'::'+newformula+'::'+truefalse+'::AND\n') continue elif truefalse_list[2] == 'True': newformula = get_simplified_OR_pattern(i, threepartlist) writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula+'::'+truefalse+'::OR\n') continue elif truefalse_list[5] == 'True': newformula = get_simplified_CHOOSE_pattern(i, threepartlist) writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula+'::'+truefalse+'::CHOOSE\n') continue elif truefalse_list[6] == 'True': newformula = get_simplified_MATCH_pattern(i, threepartlist) writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula +'::'+truefalse+ '::MATCH\n') continue elif truefalse_list[3] == 'True': newformula = get_simplified_LOOKUP_pattern(i, excelname, sheetname, threepartlist) writefile.write(excelname+'::'+sheetname+'::'+formula+'::'+newformula+'::'+truefalse+'::LOOKUP\n') continue elif truefalse_list[4] == 'True': newformula = get_simplified_ID_LOOKUP_pattern(i, excelname, sheetname, threepartlist) writefile.write(excelname+'::'+sheetname+'::'+formula+'::'+newformula+'::'+truefalse+'::LOOKUP\n') continue elif truefalse_list[8] == 'True': newformula = get_simplified_MAXMIN_pattern(i, threepartlist) writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula +'::'+truefalse+ '::MAXMIN\n') continue elif truefalse_list[9] == 'True': newformula = get_simplified_USELESS_pattern(i, threepartlist) writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula +'::'+truefalse+ '::USELESS\n') continue elif truefalse_list[10] == 'True': newformula = get_simplified_IFS_pattern(i, threepartlist) writefile.write(excelname+'::'+sheetname+'::'+formula + '::' + newformula +'::'+truefalse+ '::IFS\n') continue except: continue writefile.close()
def generate_classifyequal(): p = ForNestedIf.ExcelParser() filenamestring = 'CONEQUAL.txt' filepath = 'D:\\Users\\v-jizha4\\ExcelExp\\expAnalyResults\\totalsplits\\' + filenamestring readfile = open(filepath) choose_filename = 'D:\\Users\\v-jizha4\\ExcelExp\\expAnalyResults\\totalsplits\\CHOOSE.txt' choose_write = open(choose_filename,'w') innerandfilename = 'D:\\Users\\v-jizha4\\ExcelExp\\expAnalyResults\\totalsplits\\innerand_new.txt' andwrite = open(innerandfilename, 'w') newform_filename = 'D:\\Users\\v-jizha4\\ExcelExp\\expAnalyResults\\totalsplits\\newform_new.txt' otherwrite = open(newform_filename, 'w') lines = readfile.readlines() returnlist = [] filenamelist = [] for eachline in lines: eachline = eachline.strip() if 'D:\Users' in eachline: filenamelist.append(eachline) continue returnlist.append(eachline) analyzelist = returnlist count = 0 and_count = 0 or_count = 0 for i in analyzelist: count+=1 p.parse('='+i) dic = p.get_dic_depth_token() result = classify_equal_formulas(i) if result == 'ischoose': print '-----------------------' print filenamelist[count-1] print i and_count+=1 print "CHOOSE count: ",and_count print "total count: ",count choose_write.write(filenamelist[count-1]+'\n') choose_write.write(i+'\n') elif result == 'isand': print '-----------------------' print filenamelist[count - 1] print i or_count += 1 print "Inner and count: ", or_count print "total count: ", count andwrite.write(filenamelist[count - 1] + '\n') andwrite.write(i + '\n') else: otherwrite.write(filenamelist[count - 1] + '\n') otherwrite.write(i + '\n') choose_write.close() andwrite.close() otherwrite.close()
def return_list_ifis_equal(formula, threepartlist): # order: choose, match, lookup, idlookup, dic_choose_match_lookup_idlookup = {} dic_choose_match_lookup_idlookup['choose'] = False dic_choose_match_lookup_idlookup['match'] = False dic_choose_match_lookup_idlookup['lookup'] = False dic_choose_match_lookup_idlookup['idlookup'] = False p = ForNestedIf.ExcelParser() condition_part_list = threepartlist[0] true_part_list = threepartlist[1] false_part_list = threepartlist[2] before_equal_list = [] after_equal_list = [] after_equal_type_list = [] true_part_type_list = [] for each in condition_part_list: if '=' not in each or '<=' in each or '>=' in each: return False dic_choose_match_lookup_idlookup['idlookup'] = True before = each.split('=')[0] after = each.split('=')[1] try: p.parse('=' + after) except: # print 'exception: cannot parse after equal part: ' + formula return False after_type = p.tokens.items[0].tsubtype after_equal_type_list.append(after_type) after_equal_list.append(after) before_equal_list.append(before) # before equal has to be the same if not len(set(before_equal_list)) == 1: return False for each in true_part_list: try: p.parse('=' + each) truepart_type = p.tokens.items[0].tsubtype true_part_type_list.append(truepart_type) except: # print 'exception: cannot parse truevalue part: ' + formula return False # true value type and after equal type need to be the same if not len(set(true_part_type_list)) == 1 or (not len(set(after_equal_type_list)) == 1): return dic_choose_match_lookup_idlookup # can only be indirect vlookup if set(after_equal_type_list) == set(['range']): if set(true_part_type_list) == set(['number']) and furtherAnalysis.is_arithmetic(true_part_list): dic_choose_match_lookup_idlookup['match'] = True return dic_choose_match_lookup_idlookup elif set(true_part_type_list) == set(['range']): dic_choose_match_lookup_idlookup['lookup'] = True return dic_choose_match_lookup_idlookup elif set(true_part_type_list) == set(['text']): return dic_choose_match_lookup_idlookup if set(after_equal_type_list) == set(['number']) and furtherAnalysis.is_arithmetic(after_equal_list): if set(true_part_type_list) == set(['number']) and furtherAnalysis.is_arithmetic(true_part_list): dic_choose_match_lookup_idlookup['match'] = True return dic_choose_match_lookup_idlookup elif set(true_part_type_list) == set(['range']) or (set(true_part_type_list) == set(['text'])): dic_choose_match_lookup_idlookup['choose'] = True return dic_choose_match_lookup_idlookup if set(after_equal_type_list) == set(['text']): if set(true_part_type_list) == set(['number']) and furtherAnalysis.is_arithmetic(true_part_list): dic_choose_match_lookup_idlookup['match'] = True return dic_choose_match_lookup_idlookup return dic_choose_match_lookup_idlookup
def whole_process_loop(dic, filepath, total_or_unique): original_formu_path = filepath+'nestif-'+total_or_unique+'-original.txt' refactor_result_path = filepath+'nestif-'+total_or_unique+'-refactored.txt' failpath = filepath+'nestif-'+total_or_unique+'-fail.txt' nodepthreducefile = filepath+'nestif-zeroreduce-'+total_or_unique+'.txt' readfile = open(original_formu_path) writesuccess = open(refactor_result_path, 'w') writefail = open(failpath,'w') zeroreduce = open(nodepthreducefile, 'w') count = 0 totalcount = 0 while (True): thisline = readfile.readline() if not thisline: break totalcount +=1 try: excelfilename, formula, num = thisline.strip().split('::') except: try: excelfilename, formula = thisline.strip().split('::') except: print '1============================'+thisline continue if '#REF' in formula or '#DIV/0' in formula or '#VALUE' in formula: print '2============================' continue formula = predealwith_formula(formula) if not formula: print '3============================'+thisline continue originalformu = formula print formula dic = {} dic['and'] = False dic['or'] = False dic['lookup'] = False dic['idlookup'] = False dic['choose'] = False dic['match'] = False dic['ifs'] = False dic['maxmin'] = False dic['useless'] = False dic['redun'] = False returnlist = dealWithRudunPatterns.redundancy_loop_NEW(formula) if returnlist != formula: isother = False dic['redun'] = True formula = returnlist try: newformula = refactor_loop_NEW(formula, dic) except: continue if newformula and newformula!= originalformu: isother = False if count % 100 == 0: print count count += 1 truefalsestring = str(dic['and']) + '::' + str(dic['or']) + '::' + str(dic['lookup']) + '::' + str( dic['idlookup']) + '::' + str(dic['choose']) + '::' + str( dic['match']) + '::' + str(dic['ifs']) + '::' +str(dic['maxmin']) + '::' + str(dic['useless']) + '::' + str(dic[ 'redun']) p = ForNestedIf.ExcelParser() p.parse('='+originalformu) olddepth = p.get_nested_ifs() try: p.parse('=' + newformula) except: writesuccess.write( excelfilename+'::'+originalformu + '::' + newformula + '::' + truefalsestring + '\n') continue newdepth = p.get_nested_ifs() depthreduce = olddepth - newdepth if depthreduce == 0: zeroreduce.write( excelfilename+'::'+originalformu + '::' + newformula + '::' + truefalsestring + '::' + str(olddepth) + '::' + str(newdepth) +'\n') else: writesuccess.write(excelfilename+'::'+originalformu + '::' + newformula+ '::'+truefalsestring+'::'+str(olddepth)+'::'+str(newdepth)+'\n') else: # print formula writefail.write(excelfilename+'::'+originalformu+'\n') writesuccess.close() writefail.close() zeroreduce.close() print count
def pattern_match_oneround(formula,dic): # input: a if formula # output: return the new formula if pattern exists, otherwise return false isother = True p = ForNestedIf.ExcelParser() try: p.parse('=' + formula) except: return False threepartlist = [[], [], []] p.get_list_threeparts(formula, threepartlist) if threepartlist[1] == '' and threepartlist[2] == '': formula = threepartlist[0] try: p.parse('=' + formula) except: return False newformula = getReducedFormulas.get_simplified_AND_pattern(formula, threepartlist) if newformula: isother = False modify_truefalsedic(dic,'and') return newformula newformula = getReducedFormulas.get_simplified_OR_pattern(formula, threepartlist) if newformula: isother = False modify_truefalsedic(dic, 'or') return newformula newformula = return_true_ifis_otherOR(formula, threepartlist) if newformula: isother = False modify_truefalsedic(dic, 'or') return newformula if return_list_ifis_equal(formula, threepartlist) and formula.count('IF') > 3: isother = False dic_small = return_list_ifis_equal(formula, threepartlist) # dic_choose_match_lookup_idlookup if dic_small['choose'] == True: newformula = getReducedFormulas.get_simplified_CHOOSE_pattern(formula, threepartlist) if newformula: modify_truefalsedic(dic, 'choose') return newformula if dic_small['match'] == True: newformula = getReducedFormulas.get_simplified_MATCH_pattern(formula, threepartlist) if newformula: modify_truefalsedic(dic, 'match') return newformula if dic_small['lookup'] == True: newformula = getReducedFormulas.get_simplified_LOOKUP_pattern(formula, threepartlist) if newformula: modify_truefalsedic(dic, 'lookup') return newformula if dic_small['idlookup'] == True: newformula = getReducedFormulas.get_simplified_ID_LOOKUP_pattern(formula, threepartlist) if newformula: modify_truefalsedic(dic, 'idlookup') return newformula newformula = getReducedFormulas.get_simplified_MAXMIN_pattern(formula) if newformula: isother = False modify_truefalsedic(dic, 'maxmin') return newformula newformula = getReducedFormulas.get_simplified_USELESS_pattern(formula) if newformula: isothere = False modify_truefalsedic(dic, 'useless') return newformula newformula = getReducedFormulas.get_simplified_IFS_pattern(formula, threepartlist) if newformula: isother = False modify_truefalsedic(dic, 'ifs') return newformula return False