コード例 #1
0
ファイル: heuristics.py プロジェクト: aboSamoor/lydia
def factor(str1, str2):
    if ":" in str1 or ":" in str2:
        return []
    
    if type(str1) == types.StringType:
        str1 = str1.decode('utf-8')
    if type(str2) == types.StringType:
        str2 = str2.decode('utf-8')
    w1 = re.findall(r'\w+', str1, re.U)
    w1 = filter(lambda x: x != 'of', w1)
    w2 = re.findall(r'\w+', str2, re.U)
#    w2.reverse()
    if len(w1) == len(w2):
        return zip(w1,w2)
    return []
    


if __name__ == "__main__":
    if len(sys.argv) < 2 :
        print "$bla.py input_csv"
    fName = os.path.abspath(sys.argv[1])
    srcList = csvConverter.csv2dicts(fName)
    resDict = {}
    for item in srcList:
        for pair in factor(item["Arabic"], item["English"]):
            resDict[pair[0]]  = pair[1]
    resDicts = csvConverter.buildListOfDictionaries(resDict, "key")
    csvConverter.dicts2csv(resDicts, fName+".deduced.csv") 

コード例 #2
0
ファイル: tag.py プロジェクト: aboSamoor/lydia
            if i > 1:
                if isTag(lines[i-1],"DET"):
                    if isTag(lines[i-2], "NN"):
                        curLine["NER"] = lines[i-2]["NER"]
    return jText


def tagFile(f, dictionary):
    jText = tools.loadJson(f)
    if jText == -1:
        return -1
    jNewText = tag(jText, dictionary)
    tools.dumpJson(jNewText, f+".t")

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print "usage: tag.py dictionary.csv file\nfile expected in json format"
    f = os.path.abspath(sys.argv[2])
    dicName = os.path.abspath(sys.argv[1])
    listOfDicts= csvConverter.csv2dicts(dicName)
    dictionary = csvConverter.buildDictionary(listOfDicts, "word")
    if os.path.isfile(f):
        tagFile(f, dictionary)
    elif os.path.isdir(f):
        i = 0
        for fName in tools.files(f, ".*\.json$"):
            if i%100 == 0:
                print "finished", i, "files"
            tagFile(fName,dictionary)
            i+=1