def parse_datatype(current, infile): '''parse the datatype and return Trit object that identifies the datatype ''' if isinstance (current, Keyword): if current.name == "trit": return (nextToken(infile), "trit", 0) next = compareKeywords(current, Keyword("trit_vector"), infile) valueOne = compareTokens(next, Token("("), infile) if not isinstance(valueOne, Literal): printError(valueOne, Literal("integer")) elif valueOne.value <= 0: printError(valueOne, Literal("greater than zero")) valueTwo = nextToken(infile) length = valueOne.value + 1 valueThree = compareTokens(valueTwo, Keyword("downto"), infile) if not isinstance(valueThree, Literal): printError(valueThree, Literal("integer")) elif valueThree.value != 0: printError(valueThree, Literal(0)) valueFour = nextToken(infile) valueFive = compareTokens(valueFour, Token(")"), infile) # construct datatype object for trit_vector and return it # along with the next token return (valueFive, "trit_vector", length) printError(current, Keyword("trit|trit_vector"))
def tokenizeNumber(infile, value): '''tokenizeNumber: identify the next integer in the file ''' next = infile.read(1) if next.isdigit(): value = value + next return tokenizeNumber(infile, value) else: infile.seek(infile.tell() - 1) return Literal(str(value))
def cal_features_from_sens_write_to_file(filename_sens, filename_output): f = open(filename_sens,'r'); line = f.readline(); while (line): if len(line) > 0: feature_values = "" sens = line.split("\t") sen_1 = sens[0] sen_2 = sens[1] feature_values += str(Literal.levenshtein_in_context(sen_1, sen_2, sens)) + "\t" # feature_values += str(ShallowSyntactic.jaccard_POS_in_context(sen_1, sen_2, sens)) + "\t" feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 0)) # feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 1)) FileProcess.append_value_to_file(feature_values, filename_output) line = f.readline(); f.close()
def cal_feature_values_for(syn_wn, syn_ox): feature_values = [] defi_wn = WordnetHandler.get_defi_for_syn(syn_wn) defi_ox = OxfordParser.get_defi_for_syn(syn_ox) gloss_wn = WordnetHandler.get_gloss_for_syn(syn_wn) gloss_ox = OxfordParser.get_gloss_for_syn(syn_ox) lemma_wn = WordnetHandler.get_lemma_for_synset(syn_wn) sd_ox = OxfordParser.get_short_defi_for_syn(syn_ox) ex_wn = WordnetHandler.get_ex_for_syn(syn_wn) ex_ox = OxfordParser.get_ex_for_syn(syn_ox) cl_ox = OxfordParser.get_collocation_for_syn(syn_ox) hyper_wn = WordnetHandler.get_hyper_defi_for_synset(syn_wn) mero_wn = WordnetHandler.get_mero_defi_for_synset(syn_wn) # # # # # # # # # # # # # # # # # # Literal literal_leven_value = 1-Literal.levenshtein(defi_wn, defi_ox) feature_values.append(literal_leven_value) literal_jacc_value = 1.00001-Literal.jaccard(defi_wn, defi_ox) feature_values.append(literal_jacc_value) # feature_values.append(literal_jacc_value+literal_leven_value) # # # # # # # # # # literal_leven_value = 1-Literal.levenshtein(gloss_wn, gloss_ox) feature_values.append(literal_leven_value) literal_jacc_value = 1.00001-Literal.jaccard(gloss_wn, gloss_ox) feature_values.append(literal_jacc_value) # feature_values.append(literal_jacc_value+literal_leven_value) # # # # # # # # # # literal_leven_ngram = literal_leven_value literal_jacc_ngram = literal_jacc_value ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 2) literal_jacc_ngram += ngrams_value literal_leven_ngram += ngrams_value ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 3) literal_jacc_ngram += ngrams_value literal_leven_ngram += ngrams_value ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 4) literal_jacc_ngram += ngrams_value literal_leven_ngram += ngrams_value ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 5) literal_jacc_ngram += ngrams_value literal_leven_ngram += ngrams_value feature_values.append(literal_jacc_ngram) # feature_values.append(literal_leven_ngram) # # # # # # # # # # # gloss_split_wn = Literal.split_and_stem(gloss_wn) # gloss_split_ox = Literal.split_and_stem(gloss_ox) # literal_jaro_winkler = Jelly.jaro_winkler(gloss_wn, gloss_ox) # feature_values.append(literal_jaro_winkler + literal_jacc_value) # # # # # # # # # # # literal_jacc_value = 1.00001-Literal.jaccard(ex_wn, ex_ox) # feature_values.append(literal_jacc_value) # # # # # # # # # # # # # # # # # # ShallowSyntactic # shallow_jaccard_POS = 0 # shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS(gloss_wn, gloss_ox) # shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 2) # shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 3) # shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 4) # feature_values.append(shallow_jaccard_POS) # # # # # # # # # # # # # # # # # # wordnet-based, WSD wn_value = WordnetBased.wordnet_based(defi_wn, defi_ox, 0) feature_values.append(wn_value) # wn_value = WordnetBased.wordnet_based(hyper_wn, defi_ox, 0) # feature_values.append(wn_value) # hypo_value = 0 # if len(syn_wn.hyponyms()) > 0: # for hypo in syn_wn.hyponyms(): # hypo_value += WordnetBased.wordnet_based_synset(hypo, defi_ox) # hypo_value /= len(syn_wn.hyponyms()) # feature_values.append(hypo_value) # hyper_value = 0 # if len(syn_wn.hypernyms()) > 0: # for hyper in syn_wn.hypernyms(): # hyper_value += WordnetBased.wordnet_based_synset(hyper, defi_ox) # hyper_value /= len(syn_wn.hypernyms()) # feature_values.append(hyper_value) # # wn_value = WordnetBased.wordnet_based(ex_wn, ex_ox,0) # feature_values.append(wn_value) # # wn_value_1 = WordnetBased.wordnet_based(defi_wn, defi_ox, 1) # feature_values.append(wn_value + wn_value_1) # # wn_value = WordnetBased.wordnet_based(gloss_wn, gloss_ox, 0) # feature_values.append(wn_value) # # wn_value_1 = WordnetBased.wordnet_based(gloss_wn, gloss_ox, 1) # feature_values.append(wn_value + wn_value_1) # # # # # # # # # # # # # # # # # # lsa # lsa_tfidf = LSA.sim_tfidf(defi_wn, defi_ox) # feature_values.append(lsa_tfidf) ## # lsa_tfidf = LSA.sim_tfidf(hyper_wn, defi_ox) # feature_values.append(lsa_tfidf) # # lsa_tfidf = LSA.sim_tfidf(gloss_wn, gloss_ox) # feature_values.append(lsa_tfidf) # lsa_tfidf = LSA.sim_tfidf(lemma_wn, sd_ox) # feature_values.append(lsa_tfidf) # # lsa_tfidf = LSA.sim_tfidf(ex_wn, ex_ox) # feature_values.append(lsa_tfidf) return feature_values
def interpret(): #keep track of values proof = [] #line by line values indent = 0 last_indent = 0 #error checking paren = 0 incomplete = False for lnum, line in enumerate(lines): indent = 0 #error checking? #parsing if line[-1] == '\n': line = line[:-1] while line[0] == '\t': indent += 1 line = line[1:] line = line.split(' ') while '' in line: line.remove('') parts = [] for i, part in enumerate(line): part = part.split('(') for x in xrange(len(part) - 1): part.insert(2 * x, '(') for j, p in enumerate(part): p = p.split(')') for x in xrange(len(p) - 1): p.insert(2 * len(p) + 1, ')') for l in p: if l != '': parts.append(l) print parts if parts[0] == '#': proof.append(Literal.Literal(parts[1:], 0, False)) elif parts[0] == '-': proof.append(Literal.Literal(parts[1:], indent, False)) elif parts[0] == 'STOP': break else: reason = parts[-3:-1] + parts[-1].split(',') proof.append(Literal.Literal(parts[:-4], indent, False)) #deal with reason dashes sub = [] if '-' in reason[-1]: sub = [int(x) for x in l.split('-')] ind = indent brk = False if reason[0] == "AND": if reason[1] == "INTRO": for l in reason[2:]: if not proof[int(l) - 1].andintro(proof[-1]): print "Invalid line", lnum brk = True break if not brk: proof[-1].valid = True print "VALID AND INTRO", lnum elif reason[1] == "ELIM": for l in reason[2:]: if not proof[int(l) - 1].andelim(proof[-1]): print "Invalid line", lnum brk = True break if not brk: proof[-1].valid = True print "VALID AND ELIM", lnum elif reason[0] == "OR": if reason[1] == "INTRO": for l in reason[2:]: if not proof[int(l) - 1].orintro(proof[-1]): print "Invalid line", lnum brk = True break if not brk: proof[-1].valid = True print "VALID OR INTRO", lnum elif reason[1] == "ELIM": for l in reason[2:]: if not proof[int(l) - 1].orelim(proof[-1]): print "Invalid line", lnum brk = True break if not brk: proof[-1].valid = True print "VALID OR ELIM", lnum elif reason[0] == "THEN": if reason[1] == "INTRO": part1 = False brk = False if proof[sub[0] - 1].thenintro(proof[-1], 0): part1 = True if part1: for ln in proof[sub[0] - 1:sub[1]]: print "CHECK", ln if ln.indent == ind + 1: if ln.thenintro(proof[-1], 1): brk = True print "brk" break elif ln.indent < ind: print "not brk" break if not brk: print "Invalid line", lnum else: proof[-1].valid = True print "VALID THEN INTRO", lnum elif reason[1] == "ELIM": if not proof[int(reason[2]) - 1].thenelim( proof[int(reason[3]) - 1], proof[-1]): print "Invalid line", lnum else: proof[-1].valid = True print "VALID THEN ELIM", lnum elif reason[0] == "IFF": if reason[1] == "INTRO": if not proof[-1].iffintro(proof[int(reason[2]) - 1], proof[int(reason[3]) - 1]): print "Invalid line", lnum else: proof[-1].valid = True print "VALID IFF INTRO", lnum elif reason[1] == "ELIM": if not proof[int(reason[2]) - 1].iffelim( proof[int(reason[3]) - 1], proof[-1]): print "Invalid line", lnum brk = True break else: proof[-1].valid = True print "VALID IFF ELIM", lnum elif reason[0] == "NOT": if reason[ 1] == "INTRO": #copy then intro except leads to false (ln.equals("false")) and proof[-1] is negative of proof[sub[0] - 1] part1 = False brk = False if proof[sub[0] - 1].notintro(proof[-1]): part1 = True if part1: for ln in proof[sub[0] - 1:sub[1]]: print "CHECK", ln if ln.indent == ind + 1: if ln.equals("false"): brk = True print "brk" break elif ln.indent < ind: print "not brk" break if not brk: print "Invalid line", lnum else: proof[-1].valid = True print "VALID NOT INTRO", lnum elif reason[1] == "ELIM": if not proof[int(reason[2]) - 1].notelim(proof[-1]): print "Invalid line", lnum brk = True break else: proof[-1].valid = True print "VALID FALSE ELIM", lnum elif reason[0] == "FALSE": if reason[1] == "INTRO": if not proof[int(reason[2]) - 1].falseintro( proof[int(reason[3]) - 1]): print "Invalid line", lnum if not brk: proof[-1].valid = True print "VALID FALSE INTRO", lnum elif reason[1] == "ELIM": if not proof[-1].falseelim(proof[int(reason[2]) - 1]): print "Invalid line", lnum else: proof[-1].valid = True print "VALID FALSE ELIM", lnum