Esempio n. 1
0
 def test2(self):
     lexerlist = Lexer('Q').tokenize(1)
     parserlist = Parser().parse(lexerlist)
     self.assertEqual(parserlist[0].kind, "propositions")
     self.assertEqual(parserlist[1].kind, "proposition")
     self.assertEqual(parserlist[2].kind, "atomic")
     self.assertEqual(parserlist[3].kind, "ID")
     self.assertEqual(parserlist[4].kind, "more-proposition")
     self.assertEqual(parserlist[5].kind, "epsilon")
Esempio n. 2
0
import numpy as np
import tensorflow as tf
import random
import sys, os
import json
import argparse
from parserr import Parser
from datamanager import DataManager
from actor import ActorNetwork
from LSTM_critic import LSTM_CriticNetwork
tf.logging.set_verbosity(tf.logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
#get parse
argv = sys.argv[1:]
parser = Parser().getParser()
args, _ = parser.parse_known_args(argv)
random.seed(args.seed)

#get data
dataManager = DataManager(args.dataset)
train_data, dev_data, test_data = dataManager.getdata(args.grained, args.maxlenth)
word_vector = dataManager.get_wordvector(args.word_vector)

if args.fasttest == 1:
    train_data = train_data[:100]
    dev_data = dev_data[:20]
    test_data = test_data[:20]
print("train_data ", len(train_data))
print("dev_data", len(dev_data))
print("test_data", len(test_data))
Esempio n. 3
0
import unittest
from lexer import Lexer
from parserr import Parser

file = open(raw_input("Enter Filename: "),'r')

data = file.readlines()
file.close()

currentline = 1

for lines in data:
    print "Proposition: " + lines.rstrip()
    tokenlist = Lexer(lines.rstrip()).tokenize(currentline)
    print "Lexer: ", tokenlist
    grammarlist = Parser().parse(tokenlist)
    print "Parser:", grammarlist
    print
    currentline += 1

'''
class Test(unittest.TestCase):
    def test1(self):
        tokenlist = Lexer('Q').tokenize(1)
        self.assertEqual(tokenlist[0].kind, "ID")
        self.assertEqual(tokenlist[0].loc.col, 1)
        self.assertEqual(tokenlist[0].loc.line, 1)

    def test2(self):
        tokenlist = Lexer('Q').tokenize(1)
        grammarlist = Parser().parse(tokenlist)
Esempio n. 4
0
from lexer import Lexer
from parserr import Parser
from smtbuilder import SMTbuilder
import os, sys, unittest

file = open(sys.argv[1], "r")
data = file.readlines()
file.close()

currentline = 1

for lines in data:
    lexerlist = Lexer(lines.rstrip()).tokenize(currentline)
    parserlist = Parser().parse(lexerlist)

    if not "Syntax Error" in parserlist:  # no grammar error found
        SMTbuilder("output.py").build(lexerlist)
        import output
    else:
        print parserlist  # prints error

    currentline += 1


class Test(unittest.TestCase):
    def test1(self):
        lexerlist = Lexer('Q').tokenize(1)
        self.assertEqual(lexerlist[0].kind, "ID")
        self.assertEqual(lexerlist[0].loc.col, 1)
        self.assertEqual(lexerlist[0].loc.line, 1)
Esempio n. 5
0
from grammar import Grammar
from grammar_menu import UI
from parserr import Parser


def read_pif(file_path):
    lst = []
    with open(file_path, 'r') as file:
        for line in file.readlines():
            elem = line.split("->")[0].strip()
            lst.append(elem)
    return lst


if __name__ == "__main__":
    # grammar = Grammar("g_test.in")
    # ui = UI(grammar)
    # ui.run()

    lst = read_pif("p1.in")
    print(lst)

    grammar = Grammar("g1.in")

    parser = Parser(grammar)
    w = ["a", "a", "c", "b", "c"]
    parser.run(w)
 def __init__(self):
     self.parser = Parser()
class Summarizer:
    def __init__(self):
        self.parser = Parser()

    def summarize(self, text, title, source, category):
        sentences = self.parser.splitSentences(text)
        titleWords = self.parser.removePunctations(title)
        titleWords = self.parser.splitWords(title)
        (keywords, wordCount) = self.parser.getKeywords(text)

        topKeywords = self.getTopKeywords(keywords[:10], wordCount, source,
                                          category)

        result = self.computeScore(sentences, titleWords, topKeywords)
        result = self.sortScore(result)

        return result

    def getTopKeywords(self, keywords, wordCount, source, category):
        # Add getting top keywords in the database here
        for keyword in keywords:
            articleScore = 1.0 * keyword['count'] / wordCount
            keyword['totalScore'] = articleScore * 1.5

        return keywords

    def sortScore(self, dictList):
        return sorted(dictList, key=lambda x: -x['totalScore'])

    def sortSentences(self, dictList):

        return sorted(dictList, key=lambda x: x['order'])

    def computeScore(self, sentences, titleWords, topKeywords):
        keywordList = [keyword['word'] for keyword in topKeywords]
        summaries = []

        for i, sentence in enumerate(sentences):
            sent = self.parser.removePunctations(sentence)
            words = self.parser.splitWords(sent)

            sbsFeature = self.sbs(words, topKeywords, keywordList)
            dbsFeature = self.dbs(words, topKeywords, keywordList)

            titleFeature = self.parser.getTitleScore(titleWords, words)
            sentenceLength = self.parser.getSentenceLengthScore(words)
            sentencePosition = self.parser.getSentencePositionScore(
                i, len(sentences))
            keywordFrequency = (sbsFeature + dbsFeature) / 2.0 * 10.0
            totalScore = (titleFeature * 1.5 + keywordFrequency * 2.0 +
                          sentenceLength * 0.5 + sentencePosition * 1.0) / 4.0

            summaries.append({
                # 'titleFeature': titleFeature,
                # 'sentenceLength': sentenceLength,
                # 'sentencePosition': sentencePosition,
                # 'keywordFrequency': keywordFrequency,
                'totalScore': totalScore,
                'sentence': sentence,
                'order': i
            })

        return summaries

    def sbs(self, words, topKeywords, keywordList):
        score = 0.0

        if len(words) == 0:
            return 0

        for word in words:
            word = word.lower()
            index = -1

        if word in keywordList:
            index = keywordList.index(word)

        if index > -1:
            score += topKeywords[index]['totalScore']

        return 1.0 / abs(len(words)) * score

    def dbs(self, words, topKeywords, keywordList):
        k = len(list(set(words) & set(keywordList))) + 1
        summ = 0.0
        firstWord = {}
        secondWord = {}

        for i, word in enumerate(words):
            if word in keywordList:
                index = keywordList.index(word)

                if firstWord == {}:
                    firstWord = {
                        'i': i,
                        'score': topKeywords[index]['totalScore']
                    }
                else:
                    secondWord = firstWord
                    firstWord = {
                        'i': i,
                        'score': topKeywords[index]['totalScore']
                    }
                    distance = firstWord['i'] - secondWord['i']

                    summ += (firstWord['score'] *
                             secondWord['score']) / (distance**2)

        return (1.0 / k * (k + 1.0)) * summ