def factor(str1, str2): if ":" in str1 or ":" in str2: return [] if type(str1) == types.StringType: str1 = str1.decode('utf-8') if type(str2) == types.StringType: str2 = str2.decode('utf-8') w1 = re.findall(r'\w+', str1, re.U) w1 = filter(lambda x: x != 'of', w1) w2 = re.findall(r'\w+', str2, re.U) # w2.reverse() if len(w1) == len(w2): return zip(w1,w2) return [] if __name__ == "__main__": if len(sys.argv) < 2 : print "$bla.py input_csv" fName = os.path.abspath(sys.argv[1]) srcList = csvConverter.csv2dicts(fName) resDict = {} for item in srcList: for pair in factor(item["Arabic"], item["English"]): resDict[pair[0]] = pair[1] resDicts = csvConverter.buildListOfDictionaries(resDict, "key") csvConverter.dicts2csv(resDicts, fName+".deduced.csv")
if i > 1: if isTag(lines[i-1],"DET"): if isTag(lines[i-2], "NN"): curLine["NER"] = lines[i-2]["NER"] return jText def tagFile(f, dictionary): jText = tools.loadJson(f) if jText == -1: return -1 jNewText = tag(jText, dictionary) tools.dumpJson(jNewText, f+".t") if __name__ == "__main__": if len(sys.argv) < 3: print "usage: tag.py dictionary.csv file\nfile expected in json format" f = os.path.abspath(sys.argv[2]) dicName = os.path.abspath(sys.argv[1]) listOfDicts= csvConverter.csv2dicts(dicName) dictionary = csvConverter.buildDictionary(listOfDicts, "word") if os.path.isfile(f): tagFile(f, dictionary) elif os.path.isdir(f): i = 0 for fName in tools.files(f, ".*\.json$"): if i%100 == 0: print "finished", i, "files" tagFile(fName,dictionary) i+=1