Esempio n. 1
0
def get_lines(filename,pickedlines,outfile):
	i = 0
	for line in file(filename):
		if len(line.split("\t")) > 1 and "<" not in line:
			if i in pickedlines:
				outfile.write(cf.convert_entities(line.split("\t")[1]))
			i+=1
Esempio n. 2
0
def get_lines(filename, pickedlines, outfile):
    i = 0
    for line in file(filename):
        if len(line.split("\t")) > 1 and "<" not in line:
            if i in pickedlines:
                outfile.write(cf.convert_entities(line.split("\t")[1]))
            i += 1
Esempio n. 3
0
def cleanup(claim):
	claim = cf.convert_entities(claim)
	claim = cf.convert_unicode(claim)
	claim = trim_claim_start(claim)
	claim = trim_claim_end(claim)
	claim = claim.replace(" '"," ").replace("' "," ").replace('"',"").replace("[","").replace("]","").replace("(","").replace(")","")
	claim = claim.replace("-"," ").replace("/"," ")
	return claim
Esempio n. 4
0
def trim_statement(claim):
	claim = c.convert_entities(claim)
	words = nltk.word_tokenize(claim)
	taggedwords = nltk.pos_tag(words)
	trimtagged = trim_statement_tagged(words,taggedwords,claim)
	if trimtagged:
		return " ".join([word[0] for word in trimtagged])
	else: return False
Esempio n. 5
0
def trim_statement(claim):
    claim = c.convert_entities(claim)
    words = nltk.word_tokenize(claim)
    taggedwords = nltk.pos_tag(words)
    trimtagged = trim_statement_tagged(words, taggedwords, claim)
    if trimtagged:
        return " ".join([word[0] for word in trimtagged])
    else:
        return False
Esempio n. 6
0
def main():
    claims = []
    for line in fileinput.input():
        if not ("<" in line):
            claim = line.split("\t")[1]
            claim = c.convert_entities(claim)
            words = nltk.word_tokenize(claim)
            taggedwords = nltk.pos_tag(words)
            claims.append((claim, taggedwords))
    pickle.dump(claims, sys.stdout)
Esempio n. 7
0
def main():
    claims = []
    for line in fileinput.input():
        if not ("<" in line):
            claim = line.split("\t")[1]
            claim = c.convert_entities(claim)
            words = nltk.word_tokenize(claim)
            taggedwords = nltk.pos_tag(words)
            claims.append((claim, taggedwords))
    pickle.dump(claims, sys.stdout)
Esempio n. 8
0
def cleanup(claim):
    claim = cf.convert_entities(claim)
    claim = cf.convert_unicode(claim)
    claim = trim_claim_start(claim)
    claim = trim_claim_end(claim)
    claim = claim.replace(" '", " ").replace("' ", " ").replace(
        '"', "").replace("[", "").replace("]",
                                          "").replace("(",
                                                      "").replace(")", "")
    claim = claim.replace("-", " ").replace("/", " ")
    return claim
Esempio n. 9
0
def main():
    for line in fileinput.input():
        claim = line.split("\t")[1].replace("\n", "")
        claim = c.convert_entities(claim)
        words = nltk.word_tokenize(claim)
        tagged = nltk.pos_tag(words)
        trimmed = d.trim_statement_tagged(words, tagged, claim)
        if trimmed:
            nouns = n.get_nouns_tagged(trimmed)
            trimclaim = " ".join([word[0] for word in trimmed])
            sys.stdout.write(trimclaim)
            for noun in nouns:
                sys.stdout.write("\t" + noun)
            sys.stdout.write("\n")
Esempio n. 10
0
def main():
	for line in fileinput.input():
		claim = line.split("\t")[1].replace("\n","")
		claim = c.convert_entities(claim)
		words = nltk.word_tokenize(claim)
		tagged = nltk.pos_tag(words)		
		trimmed = d.trim_statement_tagged(words,tagged,claim)
		if trimmed:
			nouns = n.get_nouns_tagged(trimmed)
			trimclaim = " ".join([word[0] for word in trimmed])
			sys.stdout.write(trimclaim)
			for noun in nouns:
				sys.stdout.write("\t"+noun)
			sys.stdout.write("\n")