Ejemplo n.º 1
0
def main(args):
    for sentence in sentenceIterator(fileinput.input()):
        rels = [set() for t in sentence]
        for token in sentence:
            if token[HEAD] != "0":
                head = int(token[HEAD]) - 1
                rels[head].add(token[DEPREL])

        for i, token in enumerate(sentence):
            features = []

            # window of words
            features.extend(makeWindow(sentence, i, 2, 2, itemgetter(FORM)))

            features.append(token[FEATS])

            # window of pos tags
            features.extend(makeWindow(sentence, i, 2, 2, itemgetter(POSTAG)))

            # pos conjunctions
            features.append("^".join(
                makeWindow(sentence, i, 1, 0, itemgetter(POSTAG))))
            features.append("^".join(
                makeWindow(sentence, i, 0, 1, itemgetter(POSTAG))))

            features.append("^".join(
                makeWindow(sentence, i, 2, 0, itemgetter(POSTAG))))
            features.append("^".join(
                makeWindow(sentence, i, 0, 2, itemgetter(POSTAG))))

            ## distance to comma
            #left = map(itemgetter(FORM), sentence[:i])
            #right = map(itemgetter(FORM), sentence[i+1:])
            #left.reverse()
            #if "," in left:
            #	features.append(str(left.index(",")))
            #else:
            #	features.append("-")

            #if "," in right:
            #	features.append(str(right.index(",")))
            #else:
            #	features.append("-")

            ## bag-of-POS
            #left = map(itemgetter(CPOSTAG), sentence[:i])
            #left.reverse()
            #right = map(itemgetter(CPOSTAG), sentence[i+1:])
            #for tag in ["CC", "CD", "DT", "EX", "FW", "IN", "JJ", "MD",
            #            "NN", "PD", "PO", "PR", "RB", "RP", "SY", "TO",
            #            "UH", "VB", "WD", "WP", "WR"]:
            #	features.append(str(left.index(tag)) if tag in left else "-")
            #	features.append(str(right.index(tag)) if tag in right else "-")

            print " ".join(features), "|".join(sorted(rels[i])) or "__"

        print
Ejemplo n.º 2
0
def main_(options, args):
	assert options.dep
	assert options.out
	pairsStream = open(options.dep)
	pairsIterator = csiparse.instanceIterator(pairsStream)
#	print >> sys.stderr, "C_dep constraints enabled"

	if options.dir:
		dirStream = open(options.dir)
		dirIterator = csiparse.instanceIterator(dirStream)
#		print >> sys.stderr, "C_dir constraints enabled"
	else:
		dirIterator = None

	if options.mod:
		relsStream = open(options.mod)
		relsIterator = csiparse.instanceIterator(relsStream)
#		print >> sys.stderr, "C_mod constraints enabled"
	else:
		relsIterator = None
	
	
	for sentence in sentenceIterator(fileinput.input(args)):
		outfile = open(options.out, "w" )

		domains, constraints = csiparse.formulateWCSP(sentence,
													  dirIterator,
													  relsIterator,
													  pairsIterator,
													  options)

		parser = cky.CKYParser(len(sentence))
		for constraint in constraints:
			parser.addConstraint(constraint)

		chart = parser.parse()

		#item = chart[0, self.numTokens - 1, "r", True]

		#for token in sentence:
		#	token[DEPREL] = "__"
		#	token[HEAD] = "__"
		for token in sentence:
			if len(token) <= DEPREL:
				token.extend([None, None])  

		#print chart[0, len(sentence) - 1, "r", True].r + 1
		rightComplete(chart, 0, len(sentence) - 1 + 1, sentence)

		if options.non_projective:
			approxNonProjective(sentence, parser)

		for token in sentence:
			outfile.write( " ".join(map(str, token)) )
			outfile.write("\n")
Ejemplo n.º 3
0
def main(args):
	for sentence in sentenceIterator(fileinput.input()):
		rels = [set() for t in sentence]
		for token in sentence:
			if token[HEAD] != "0":
				head = int(token[HEAD]) - 1
				rels[head].add(token[DEPREL])

		for i, token in enumerate(sentence):
			features = []
		
			# window of words
			features.extend(makeWindow(sentence, i, 2, 2, itemgetter(FORM)))

			features.append(token[FEATS])
			
			# window of pos tags
			features.extend(makeWindow(sentence, i, 2, 2, itemgetter(POSTAG)))

			# pos conjunctions
			features.append("^".join(makeWindow(sentence, i, 1, 0, itemgetter(POSTAG))))
			features.append("^".join(makeWindow(sentence, i, 0, 1, itemgetter(POSTAG))))

			features.append("^".join(makeWindow(sentence, i, 2, 0, itemgetter(POSTAG))))
			features.append("^".join(makeWindow(sentence, i, 0, 2, itemgetter(POSTAG))))

			## distance to comma
			#left = map(itemgetter(FORM), sentence[:i])
			#right = map(itemgetter(FORM), sentence[i+1:])
			#left.reverse()
			#if "," in left:
			#	features.append(str(left.index(",")))
			#else:
			#	features.append("-")

			#if "," in right:
			#	features.append(str(right.index(",")))
			#else:
			#	features.append("-")

			## bag-of-POS
			#left = map(itemgetter(CPOSTAG), sentence[:i])
			#left.reverse()
			#right = map(itemgetter(CPOSTAG), sentence[i+1:])
			#for tag in ["CC", "CD", "DT", "EX", "FW", "IN", "JJ", "MD",
			#            "NN", "PD", "PO", "PR", "RB", "RP", "SY", "TO",
			#            "UH", "VB", "WD", "WP", "WR"]:
			#	features.append(str(left.index(tag)) if tag in left else "-")
			#	features.append(str(right.index(tag)) if tag in right else "-")

			print " ".join(features), "|".join(sorted(rels[i])) or "__"

		print
Ejemplo n.º 4
0
def main_(options, args):
    assert options.dep
    assert options.out
    pairsStream = open(options.dep)
    pairsIterator = csiparse.instanceIterator(pairsStream)
    #	print >> sys.stderr, "C_dep constraints enabled"

    if options.dir:
        dirStream = open(options.dir)
        dirIterator = csiparse.instanceIterator(dirStream)
#		print >> sys.stderr, "C_dir constraints enabled"
    else:
        dirIterator = None

    if options.mod:
        relsStream = open(options.mod)
        relsIterator = csiparse.instanceIterator(relsStream)
#		print >> sys.stderr, "C_mod constraints enabled"
    else:
        relsIterator = None

    for sentence in sentenceIterator(fileinput.input(args)):
        outfile = open(options.out, "w")

        domains, constraints = csiparse.formulateWCSP(sentence, dirIterator,
                                                      relsIterator,
                                                      pairsIterator, options)

        parser = cky.CKYParser(len(sentence))
        for constraint in constraints:
            parser.addConstraint(constraint)

        chart = parser.parse()

        #item = chart[0, self.numTokens - 1, "r", True]

        #for token in sentence:
        #	token[DEPREL] = "__"
        #	token[HEAD] = "__"
        for token in sentence:
            if len(token) <= DEPREL:
                token.extend([None, None])

        #print chart[0, len(sentence) - 1, "r", True].r + 1
        rightComplete(chart, 0, len(sentence) - 1 + 1, sentence)

        if options.non_projective:
            approxNonProjective(sentence, parser)

        for token in sentence:
            outfile.write(" ".join(map(str, token)))
            outfile.write("\n")
Ejemplo n.º 5
0
def main(options, args):
    dirOutput, relsOutput, pairsOutput = args[:3]

    dirStream = open(dirOutput)
    relsStream = open(relsOutput)
    pairsStream = open(pairsOutput)

    dirIterator = instanceIterator(dirStream)
    relsIterator = instanceIterator(relsStream)
    pairsIterator = instanceIterator(pairsStream)

    for sentence in sentenceIterator(fileinput.input(args[3:])):
        csp = formulateWCSP(sentence, dirIterator, relsIterator, pairsIterator,
                            options)

    dirStream.close()
    relsStream.close()
    pairsStream.close()
Ejemplo n.º 6
0
def main(args):
    for sentence in sentenceIterator(fileinput.input()):
        for i, token in enumerate(sentence):
            features = []

            #if i == 0:
            #	features.append("FIRST")
            #elif i + 1 == len(sentence):
            #	features.append("LAST")
            #else:
            #	features.append("MIDDLE")

            # window of words
            features.extend(makeWindow(sentence, i, 2, 2, itemgetter(FORM)))

            # window of pos tags
            features.extend(makeWindow(sentence, i, 2, 2, itemgetter(POSTAG)))

            # window of word-tag conjunctions
            features.extend(
                makeWindow(sentence, i, 2, 2, lambda x: "%s^%s" %
                           (x[FORM], x[POSTAG])))

            # pos conjunctions
            features.append("^".join(
                makeWindow(sentence, i, 1, 0, itemgetter(POSTAG))))
            features.append("^".join(
                makeWindow(sentence, i, 0, 1, itemgetter(POSTAG))))

            #features.append("^".join(makeWindow(sentence, i, 2, 0, itemgetter(POSTAG))))
            #features.append("^".join(makeWindow(sentence, i, 0, 2, itemgetter(POSTAG))))

            #features.append(token[FEATS])
            features.extend(makeWindow(sentence, i, 1, 1, itemgetter(FEATS)))

            if token[HEAD] == "0":
                dir = "ROOT"
            else:
                dir = ["LEFT", "RIGHT"][int(token[ID]) < int(token[HEAD])]

            print " ".join(features), dir

        print
Ejemplo n.º 7
0
def main(args):
	for sentence in sentenceIterator(fileinput.input()):
		for i, token in enumerate(sentence):
			features = []

			#if i == 0:
			#	features.append("FIRST")
			#elif i + 1 == len(sentence):
			#	features.append("LAST")
			#else:
			#	features.append("MIDDLE")

			# window of words
			features.extend(makeWindow(sentence, i, 2, 2, itemgetter(FORM)))

			# window of pos tags
			features.extend(makeWindow(sentence, i, 2, 2, itemgetter(POSTAG)))

			# window of word-tag conjunctions
			features.extend(makeWindow(sentence, i, 2, 2, lambda x: "%s^%s" % (x[FORM], x[POSTAG])))

			# pos conjunctions
			features.append("^".join(makeWindow(sentence, i, 1, 0, itemgetter(POSTAG))))
			features.append("^".join(makeWindow(sentence, i, 0, 1, itemgetter(POSTAG))))

			#features.append("^".join(makeWindow(sentence, i, 2, 0, itemgetter(POSTAG))))
			#features.append("^".join(makeWindow(sentence, i, 0, 2, itemgetter(POSTAG))))


			#features.append(token[FEATS])
			features.extend(makeWindow(sentence, i, 1, 1, itemgetter(FEATS)))
			
			if token[HEAD] == "0":
				dir = "ROOT"
			else:
				dir = ["LEFT", "RIGHT"][int(token[ID]) < int(token[HEAD])]

			print " ".join(features), dir

		print
Ejemplo n.º 8
0
def main(options, args):
	dirOutput, relsOutput, pairsOutput = args[:3]

	dirStream = open(dirOutput)
	relsStream = open(relsOutput)
	pairsStream = open(pairsOutput)

	dirIterator = instanceIterator(dirStream)
	relsIterator = instanceIterator(relsStream)
	pairsIterator = instanceIterator(pairsStream)
	
	for sentence in sentenceIterator(fileinput.input(args[3:])):
		csp = formulateWCSP(sentence,
							dirIterator,
							relsIterator,
							pairsIterator,
							options)


	dirStream.close()
	relsStream.close()
	pairsStream.close()
Ejemplo n.º 9
0
def main(options, args):
    for sentence in sentenceIterator(fileinput.input(args)):
        for dependent in sentence:
            dist = "ROOTDEP"
            features = []

            dependentId = int(dependent[ID]) - 1

            # window of words
            features.extend(
                makeWindow(sentence, dependentId, 1, 1, itemgetter(FORM)))
            #									   2, 2, itemgetter(FORM)))
            #features.extend(makeWindow(sentence, headId,
            #						   1, 1, itemgetter(FORM)))

            features.extend(["ROOT", "ROOT", "ROOT"])

            #									   2, 2, itemgetter(FORM)))

            # window of pos tags
            features.extend(
                makeWindow(sentence, dependentId, 1, 1, itemgetter(POSTAG)))
            #									   2, 2, itemgetter(POSTAG)))
            #features.extend(makeWindow(sentence, headId,
            #						   1, 1, itemgetter(POSTAG)))
            features.extend(["ROOT", "ROOT", "ROOT"])
            #									   2, 2, itemgetter(POSTAG)))

            #for id in [dependentId, headId]:
            #	window = makeWindow(sentence, id,
            #	                    2, 2, itemgetter(POSTAG))
            #	features.append("%s^%s" % tuple(window[:2]))
            #	features.append("%s^%s" % tuple(window[-2:]))

            features.append("%s^%s" % (dependent[POSTAG], "ROOT"))

            # relative position, distance
            #features.append(
            #	["LEFT", "RIGHT"][dependentId < headId])
            features.append("ROOT")
            #features.append(str(abs(dependentId - headId)))
            features.append("ROOT")

            if options.bigram:
                features.append("^".join(["ROOT", dependent[FORM]]))

            if options.featsBigram:
                features.append("^".join(["ROOT", dependent[FEATS]]))

            #posTags = map(itemgetter(CPOSTAG), sentence[min(dependentId, headId):max(dependentId, headId)])
            #for tag in ["CC", "CD", "DT", "EX", "FW", "IN", "JJ", "MD",
            #            "NN", "PD", "PO", "PR", "RB", "RP", "SY", "TO",
            #            "UH", "VB", "WD", "WP", "WR"]:
            #	features.append(str(sum(1 for t in posTags if t == tag)))

            if not options.test:
                if dependent[HEAD] == "0":
                    rel = dependent[DEPREL]
                else:
                    rel = "__"
            else:
                rel = "?"

            print " ".join(features), rel

        for dependent, head in common.pairIterator(sentence, options):
            dist = abs(int(dependent[ID]) - int(head[ID]))
            features = []

            dependentId = int(dependent[ID]) - 1
            headId = int(head[ID]) - 1

            # window of words
            features.extend(
                makeWindow(sentence, dependentId, 1, 1, itemgetter(FORM)))
            #									   2, 2, itemgetter(FORM)))
            features.extend(
                makeWindow(sentence, headId, 1, 1, itemgetter(FORM)))
            #									   2, 2, itemgetter(FORM)))

            # window of pos tags
            features.extend(
                makeWindow(sentence, dependentId, 1, 1, itemgetter(POSTAG)))
            #									   2, 2, itemgetter(POSTAG)))
            features.extend(
                makeWindow(sentence, headId, 1, 1, itemgetter(POSTAG)))
            #									   2, 2, itemgetter(POSTAG)))

            #for id in [dependentId, headId]:
            #	window = makeWindow(sentence, id,
            #	                    2, 2, itemgetter(POSTAG))
            #	features.append("%s^%s" % tuple(window[:2]))
            #	features.append("%s^%s" % tuple(window[-2:]))

            features.append("%s^%s" % (dependent[POSTAG], head[POSTAG]))

            # relative position, distance
            features.append(["LEFT", "RIGHT"][dependentId < headId])
            features.append(str(abs(dependentId - headId)))

            if options.bigram:
                features.append("^".join([head[FORM], dependent[FORM]]))

            if options.featsBigram:
                features.append("^".join([head[FEATS], dependent[FEATS]]))

            #posTags = map(itemgetter(CPOSTAG), sentence[min(dependentId, headId):max(dependentId, headId)])
            #for tag in ["CC", "CD", "DT", "EX", "FW", "IN", "JJ", "MD",
            #            "NN", "PD", "PO", "PR", "RB", "RP", "SY", "TO",
            #            "UH", "VB", "WD", "WP", "WR"]:
            #	features.append(str(sum(1 for t in posTags if t == tag)))

            if not options.test:
                if dependent[HEAD] == head[ID]:
                    rel = dependent[DEPREL]
                else:
                    rel = "__"
            else:
                rel = "?"

            print " ".join(features), rel

        if options.separateSentences:
            print
Ejemplo n.º 10
0
def main(options, args):
	for sentence in sentenceIterator(fileinput.input(args)):
		for dependent in sentence:
			dist = "ROOTDEP"
			features = []

			dependentId = int(dependent[ID]) - 1

			# window of words
			features.extend(makeWindow(sentence, dependentId,
									   1, 1, itemgetter(FORM)))
#									   2, 2, itemgetter(FORM)))
			#features.extend(makeWindow(sentence, headId,
			#						   1, 1, itemgetter(FORM)))

			features.extend(["ROOT", "ROOT", "ROOT"])
			
#									   2, 2, itemgetter(FORM)))

			# window of pos tags
			features.extend(makeWindow(sentence, dependentId,
									   1, 1, itemgetter(POSTAG)))
#									   2, 2, itemgetter(POSTAG)))
			#features.extend(makeWindow(sentence, headId,
			#						   1, 1, itemgetter(POSTAG)))
			features.extend(["ROOT", "ROOT", "ROOT"])
#									   2, 2, itemgetter(POSTAG)))

			#for id in [dependentId, headId]:
			#	window = makeWindow(sentence, id,
			#	                    2, 2, itemgetter(POSTAG))
			#	features.append("%s^%s" % tuple(window[:2]))
			#	features.append("%s^%s" % tuple(window[-2:]))

			features.append("%s^%s" % (dependent[POSTAG], "ROOT"))

			# relative position, distance
			#features.append(
			#	["LEFT", "RIGHT"][dependentId < headId])
			features.append("ROOT")
			#features.append(str(abs(dependentId - headId)))
			features.append("ROOT")

			if options.bigram:
				features.append("^".join(["ROOT", dependent[FORM]]))

			if options.featsBigram:
				features.append("^".join(["ROOT", dependent[FEATS]]))

			#posTags = map(itemgetter(CPOSTAG), sentence[min(dependentId, headId):max(dependentId, headId)])
			#for tag in ["CC", "CD", "DT", "EX", "FW", "IN", "JJ", "MD",
			#            "NN", "PD", "PO", "PR", "RB", "RP", "SY", "TO",
			#            "UH", "VB", "WD", "WP", "WR"]:
			#	features.append(str(sum(1 for t in posTags if t == tag)))
			
			if not options.test:
				if dependent[HEAD] == "0":
					rel = dependent[DEPREL]
				else:
					rel = "__"
			else:
				rel = "?"

			print " ".join(features), rel
		
		for dependent, head in common.pairIterator(sentence, options):
			dist = abs(int(dependent[ID]) - int(head[ID]))
			features = []

			dependentId = int(dependent[ID]) - 1
			headId = int(head[ID]) - 1

			# window of words
			features.extend(makeWindow(sentence, dependentId,
									   1, 1, itemgetter(FORM)))
#									   2, 2, itemgetter(FORM)))
			features.extend(makeWindow(sentence, headId,
									   1, 1, itemgetter(FORM)))
#									   2, 2, itemgetter(FORM)))

			# window of pos tags
			features.extend(makeWindow(sentence, dependentId,
									   1, 1, itemgetter(POSTAG)))
#									   2, 2, itemgetter(POSTAG)))
			features.extend(makeWindow(sentence, headId,
									   1, 1, itemgetter(POSTAG)))
#									   2, 2, itemgetter(POSTAG)))

			#for id in [dependentId, headId]:
			#	window = makeWindow(sentence, id,
			#	                    2, 2, itemgetter(POSTAG))
			#	features.append("%s^%s" % tuple(window[:2]))
			#	features.append("%s^%s" % tuple(window[-2:]))

			features.append("%s^%s" % (dependent[POSTAG], head[POSTAG]))

			# relative position, distance
			features.append(
				["LEFT", "RIGHT"][dependentId < headId])
			features.append(str(abs(dependentId - headId)))

			if options.bigram:
				features.append("^".join([head[FORM], dependent[FORM]]))

			if options.featsBigram:
				features.append("^".join([head[FEATS], dependent[FEATS]]))

			#posTags = map(itemgetter(CPOSTAG), sentence[min(dependentId, headId):max(dependentId, headId)])
			#for tag in ["CC", "CD", "DT", "EX", "FW", "IN", "JJ", "MD",
			#            "NN", "PD", "PO", "PR", "RB", "RP", "SY", "TO",
			#            "UH", "VB", "WD", "WP", "WR"]:
			#	features.append(str(sum(1 for t in posTags if t == tag)))
			
			if not options.test:
				if dependent[HEAD] == head[ID]:
					rel = dependent[DEPREL]
				else:
					rel = "__"
			else:
				rel = "?"

			print " ".join(features), rel

		if options.separateSentences:
			print