Python loadWordPairDict Examples

Programming Language: Python

Namespace/Package Name: util

Method/Function: loadWordPairDict

Examples at hotexamples.com: 4

Python loadWordPairDict - 4 examples found. These are the top rated real world Python examples of util.loadWordPairDict extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: makeFeatFile.py Project: yuchenz/haceptProject

def makeFeatFile(chF, enF, waF, outF, numProc):
    chSentL = [
        line.split() for line in codecs.open(chF, 'r', 'utf-8').readlines()
    ]
    enSentL = [
        line.split() for line in codecs.open(enF, 'r', 'utf-8').readlines()
    ]
    waL = [line.split() for line in codecs.open(waF, 'r', 'utf-8').readlines()]

    assert len(chSentL) == len(enSentL) == len(waL), \
      "len chSentL == %d, len enSentL == %d, len waL == %d" % (len(chSentL), len(enSentL), len(waL))

    fwD = loadFuncWordDict("ch_funcWordL.txt")
    wpD = loadWordPairDict("cedict_hacept_train.dict")

    s = time.clock()
    if numProc > 1:
        pool = mp.Pool(processes=numProc)
        tmp = []
        base = len(chSentL) / (numProc - 1)
        for i in xrange(1, numProc + 1):
            start = base * (i - 1)
            end = base * i if i < numProc else len(chSentL)
            tmp.append(
                pool.apply_async(extract,
                                 args=(chSentL[start:end], enSentL[start:end],
                                       waL[start:end], start, fwD, wpD)))

        expList = []
        for t in tmp:
            expL = t.get()
            expList.extend(expL)
    else:
        expList = extract(chSentL, enSentL, waL, 0, fwD, wpD)

    print >> sys.stderr, "\nextraction time: %f" % (time.clock() - s)

    s = time.clock()
    outf = codecs.open("/dev/shm/tmp", 'w', 'utf-8')
    for exp in expList:
        outf.write(exp.__str__())
    outf.close()
    print >> sys.stderr, "outputing time: %f" % (time.clock() - s)
    subprocess.call("mv /dev/shm/tmp " + outF, shell=True)

Example #2

Show file

def make(chF, enF, gwaF, waF, outF):
    chSentL = [
        line.split() for line in codecs.open(chF, 'r', 'utf-8').readlines()
    ]
    enSentL = [
        line.split() for line in codecs.open(enF, 'r', 'utf-8').readlines()
    ]
    if gwaF == "None":
        gwaL = [[] for i in xrange(len(chSentL))]
    else:
        gwaL = [line.split() for line in open(gwaF).readlines()]
    waL = [line.split() for line in open(waF).readlines()]

    print "len of chSentL, enSentL, gwaL, waL: ", len(chSentL), len(
        enSentL), len(gwaL), len(waL)

    fwD = loadFuncWordDict("ch_funcWordL.txt")
    wpD = loadWordPairDict("cedict_hacept_train.dict")
    #wpD = loadWordPairDict("hacept_train.dict")

    expList = []
    for k, chSent in enumerate(chSentL):
        if k % 100 == 0: print k,
        enSent = enSentL[k]
        waSent = waL[k]
        gwaSent = gwaL[k]

        for wa in waSent:
            ID = 'ID' + str(k) + '--' + wa
            label = 'False'
            if wa in gwaSent:
                label = 'True'
            exp = Example(ID, label)
            i, j = int(wa.split('-')[0]), int(wa.split('-')[1])
            exp.featList = extractFeat(i, j, chSent, enSent, wpD, fwD)
            expList.append(exp)

    outf = codecs.open(outF, 'w', 'utf-8')
    for exp in expList:
        outf.write(exp.__str__())
    outf.close()

Example #3

Show file

File: makeFeatFile.py Project: yuchenz/haceptProject

def makeFeatFile(chF, enF, waF, outF, numProc):
	chSentL = [line.split() for line in codecs.open(chF, 'r', 'utf-8').readlines()]
	enSentL = [line.split() for line in codecs.open(enF, 'r', 'utf-8').readlines()]
	waL = [line.split() for line in codecs.open(waF, 'r', 'utf-8').readlines()]

	assert len(chSentL) == len(enSentL) == len(waL), \
			"len chSentL == %d, len enSentL == %d, len waL == %d" % (len(chSentL), len(enSentL), len(waL))

	fwD = loadFuncWordDict("ch_funcWordL.txt")
	wpD = loadWordPairDict("cedict_hacept_train.dict")

	s = time.clock()
	if numProc > 1:
		pool = mp.Pool(processes = numProc)
		tmp = []
		base = len(chSentL) / (numProc - 1)
		for i in xrange(1, numProc + 1):
			start = base * (i - 1)
			end = base * i if i < numProc else len(chSentL)
			tmp.append(pool.apply_async(extract, args=(chSentL[start:end], enSentL[start:end], waL[start:end], start, fwD, wpD)))
		
		expList = []
		for t in tmp:
			expL = t.get()
			expList.extend(expL)
	else:
		expList = extract(chSentL, enSentL, waL, 0, fwD, wpD)

	print >> sys.stderr, "\nextraction time: %f" % (time.clock() - s)
		
	s = time.clock()
	outf = codecs.open("/dev/shm/tmp", 'w', 'utf-8')
	for exp in expList:
		outf.write(exp.__str__())
	outf.close()
	print >> sys.stderr, "outputing time: %f" % (time.clock() - s)
	subprocess.call("mv /dev/shm/tmp " + outF, shell=True)

Example #4

Show file

File: makeFeatFile2.py Project: yuchenz/haceptProject

def make(chF, enF, gwaF, waF, outF):
	chSentL = [line.split() for line in codecs.open(chF, 'r', 'utf-8').readlines()]
	enSentL = [line.split() for line in codecs.open(enF, 'r', 'utf-8').readlines()]
	if gwaF == "None":
		gwaL = [[] for i in xrange(len(chSentL))]
	else:
		gwaL = [line.split() for line in open(gwaF).readlines()]
	waL = [line.split() for line in open(waF).readlines()]

	print "len of chSentL, enSentL, gwaL, waL: ", len(chSentL), len(enSentL), len(gwaL), len(waL)

	fwD = loadFuncWordDict("ch_funcWordL.txt")
	wpD = loadWordPairDict("cedict_hacept_train.dict")
	#wpD = loadWordPairDict("hacept_train.dict")

	expList = []
	for k, chSent in enumerate(chSentL):
		if k % 100 == 0: print k,
		enSent = enSentL[k]
		waSent = waL[k]
		gwaSent = gwaL[k]

		for wa in waSent:
			ID = 'ID' + str(k) + '--' + wa
			label = 'False'
			if wa in gwaSent:
				label = 'True'
			exp = Example(ID, label)
			i, j = int(wa.split('-')[0]), int(wa.split('-')[1])
			exp.featList = extractFeat(i, j, chSent, enSent, wpD, fwD)
			expList.append(exp)
	
	outf = codecs.open(outF, 'w', 'utf-8')
	for exp in expList:
		outf.write(exp.__str__())
	outf.close()