Esempio n. 1
0
def main():
    resultados = CYK().algoritmo_cyk()

    with open('out-status.txt', 'w') as writer:
        for resultado in resultados.values():
            for i in range(len(resultado)):
                writer.write(str(resultado[i]))

                if (i != len(resultado) - 1):
                    writer.write(' ')

            writer.write('\n')
Esempio n. 2
0
 def test10(self):
     print("===== Test 10 =====")
     self._use_grammar3()
     input_str = 'ab'
     print('Input string: ' + input_str)
     result = (CYK.is_in_cfl(input_str, self._grammar))
     print('Result = ' + str(result))
Esempio n. 3
0
 def test1a(self):
     print("===== Test 1a =====")
     self._use_grammar1()
     #self._grammar.display()
     input_str = 'ab'
     print('Input string: ' + input_str)
     result = (CYK.is_in_cfl(input_str, self._grammar))
     print('Result = ' + str(result))
Esempio n. 4
0
               help='Input sentence (text to parse)')
args = p.parse_args()

corpus = []
with open("data/sequoia-corpus+fct.mrg_strict", "r") as file_corpus:
    for line in file_corpus:
        corpus.append(line)

frac_train = 0.8
N = len(corpus)
nb_train = int(round(N * frac_train))
corpus_train = corpus[:nb_train]

# Building Parser
print("Building PCFG and Parser")
my_CYK_parser = CYK(corpus_train)
print("Done")

print("Start Parsing")

if args.test_sentence:
    sent = args.test_sentence
    print("#################")
    print("Sentence: ")
    print(sent + "\n")

    print("Parsing")
    my_parsing = my_CYK_parser.parse(sent)
    if my_parsing is None:
        print("Found no viable parsing.")
    else:
Esempio n. 5
0
length_test = int(0.1 * corpus_length)

end_dev = length_train + length_dev

corpus_train = sentences[:length_train]
corpus_dev = sentences[length_train:end_dev]
corpus_test = sentences[end_dev:]

# Get unparsed sentence from eval_corpus.txt
with open('data/eval_corpus.txt', 'r') as f:
    file = f.read()
    test_sentences = file.split('\n')

# Build the parser with corpus_train
print('Building the parser...')
cyk_parser = CYK(corpus_train)
print('Done')

# Parsing of Evaluation sentences
print('Parsing...')

test_sentences_bis = []

with open('data/evaluation_data.parser.txt', 'w') as f:
    for sentence in test_sentences:
        parsed_sentence = cyk_parser.parse(sentence)
        if parsed_sentence is not None:
            test_sentences_bis.append(sentence)
            f.write('%s\n' % parsed_sentence)

print('Done')
Esempio n. 6
0
from cyk import CYK

startstate = "S"

grammar = {
    startstate: ["VaE", "VbF"],
    "G": ["GG", "a", "b", "VaVb"],
    "E": ["GVa", "a"],
    "F": ["GVb", "b"],
    "Va": ["a"],
    "Vb": ["b"]
}

cyk = CYK(grammar, startstate)

word = "abacba"

print(cyk.checkWord(word))

cyk.outputTable(word)
Esempio n. 7
0
    T = tagged_sent_to_tree(human_parsing, remove_after_hyphen=True)
    real_parsing = tree_to_sentence(T)
    real_parsings_test.append(real_parsing)

    sent = tagged_to_sentence(real_parsing)
    sentences_test.append(sent)

with open('results/sentences_test.txt', 'w') as f:
    for item in sentences_test:
        f.write("%s\n" % item)

# Build CYK

print("Build CYK parser")
tic = time.time()
my_CYK_parser = CYK(dataset["train"])
tac = time.time()
print("Done in " + str(round(tac - tic, 2)) + "sec\n")

# Use pyevalb
assert (len(sentences_test) == nb_test)
assert (len(real_parsings_test) == nb_test)

for idx_sentence in range(nb_test):

    print("##############################")

    real_parsing = real_parsings_test[idx_sentence]
    sent = sentences_test[idx_sentence]

    print("Sentence #" + str(idx_sentence))
Esempio n. 8
0
    print(len(treeList))
    # print (MATRIX.shape)
    print(treeList[0])

    # run the exps cycling through different parameters
    for dimension in [8192]:
        # print ('dimension: ', dimension)
        for LAMBDA in [0.6]:
            for filter in [1.5]:
                # print ('filter: ', filter)

                # creating parser instance
                if MODE == "binary":
                    cykInstance = CYK(dimension,
                                      LAMBDA,
                                      Grammar,
                                      filter=filter)
                else:
                    cykInstance = CYKPlus(dimension,
                                          LAMBDA,
                                          Grammar,
                                          filter=filter)

                # creating or loading the appropriate matrix of distributed trees
                # matrix = pickle.load("matrixFile", "rb")
                for distortRate in [0]:
                    results = runExp(treeList,
                                     cykInstance,
                                     k_best=2,
                                     k_max=2,
                                     matrix=MATRIX,
Esempio n. 9
0
    # # loading grammar (binarized)
    # Grammar = pickle.load(open("binaryGrammar.txt", "rb"))
    Grammar = pickle.load(open("binaryGrammar23.txt", "rb"))
    #
    # # defining parser and dtk parameters
    # dimension = 1024
    # filter = 1.5
    # LAMBDA = 0.6

    # path = '/Users/lorenzo/Documents/Universita/PHD/Lavori/Codice/pyCYK/'

    for dimension in [1024, 2048, 4096, 8192, 16384]:
        for LAMBDA in [0.6]:
            for filter in [1.5, 2, 2.5]:
                parser = CYK(dimension, LAMBDA, Grammar, filter=filter)

                # filename
                # f_bin = 'binaryoutput_{0}_{1}'.format(dimension, filter)
                # f_debin = 'debinarized_{0}_{1}'.format(dimension, filter)

                # # parsing and saving
                parsingAndSaving(treeListBinarized, "binaryreconstructed.txt",
                                 parser)

                ## loading java from python -> procudes debinarized.txt
                os.system(javaString)

                print('\n parametri: dimensione={0}, filter={1}'.format(
                    dimension, filter))
Esempio n. 10
0
    # for OOV
    oovwords = OOV(embedfilename, vocabulary)

    # parse new sentences using CYK based on learned PCFG
    # parser = CYKSolver(lexicon, grammar, vocabulary, symbols, oovwords)

    # i = 0
    for line in sys.stdin:
        # print('start parse')
        # print(line)
        # start = time.time()
        # if line == '\n': continue
        # cyksolver = CYK(line.split(), lexicon, grammar, vocabulary, symbols, embedfilename)
        # i += 1
        # if i < 20: continue
        # if i > 3: break
        # parsedtree = parser.compute(line.split())
        parsedtree = CYK(line.split(), lexicon, grammar, vocabulary, symbols,
                         oovwords)
        if parsedtree == None:
            print('(None)')
            continue
        parsedtree.un_chomsky_normal_form()
        # end = time.time()
        # print(end-start)
        # print('bon')
        print('( ' +
              parsedtree._pformat_flat(nodesep='', parens='()', quotes=False) +
              ')')