Example #1
0
 def __init__(self):
     self.parser = MyParser()
     self.__base_url = base_url()
     self.__login_url = login_url()
     self.__failures = 0
     self.__session = self.open_session()
     self.cache = Cache()
     self.file_io_driver = FileIODriver()
     self.current_url_id = int(
         self.cache.last_id) if self.cache.last_id else crawl_start_id()
     self.current_url = ''
Example #2
0
    def __init__(self):
        super(MainWindow, self).__init__()
        self.ui = Ui_MainWindow()
        self.ui.setupUi(self)
        self.setWindowTitle('Програма для морфологічного аналізу слів, та вирішування тестів')
        self.ui.pushButton_solve.clicked.connect(self.solve)
        self.parser_obj = MyParser()
        self.another = Another()
        self.ab = About()
        self.settings  = Settings()
        self.ui.action.triggered.connect(self.another.instruction)
        self.ui.action_about.triggered.connect(self.ab.about)
        self.ui.action_3.triggered.connect(self.settings.show_set)
        self.settings.ui.checkBox.clicked.connect(self.hide_predict)

        self.show_message('Програма не гарантує 100% правильність відповіді')
Example #3
0
def main():

    # コマンドライン引数を取得
    args = sys.argv

    # 関数定義
    funcs = args[1]
    # 定数定義
    defines = args[2]
    # 入力ファイル
    infile = args[3]
    # 出力ファイル
    outfile = args[4]

    # 字句解析する
    parser = MyParser(Lexer(LexerReader(infile)), funcs, defines)
    # 構文解析する
    parser.parse(outfile)
Example #4
0
def main():

    # コマンドライン引数を取得
    args = sys.argv

    # 関数定義
    funcs = args[1]
    # 定数定義
    defines = args[2]
    # 入力ファイル
    infile = args[3]
    # 出力ファイル
    outfile = args[4]

    # 字句解析する
    parser = MyParser(Lexer(LexerReader(infile)), funcs, defines)
    # 構文解析する
    parser.parse(outfile)
def main(url):
    parser = MyParser(url)
    text = parser.bpArtGetText()
    print(text)
    pos_tagged = semantics.text_to_pos_tags(text)
    entities = semantics.entities(pos_tagged, "PER", "ORG", "LOC", "DAT", "FAC", "GPE")
    entities = semantics.ordered_set_of_tags(entities)
    adjectives = semantics.adjectives(pos_tagged)
    adjectives = semantics.ordered_set_of_tags(adjectives)
    for tag in entities and adjectives:
        print(tag)
    for ent in entities:
        tag_dict[ent] = "ENT"
    print(entities)
    for adj in adjectives:
        tag_dict[adj] = "ADJ"
    print(adjectives)
    mean_arousal = lookup.mean(pos_tagged)
    print(mean_arousal)
def main(url):
    parser = MyParser(url)
    text = parser.bpArtGetText()
    print(text)
    pos_tagged = semantics.text_to_pos_tags(text)
    entities = semantics.entities(pos_tagged, "PER", "ORG", "LOC", "DAT",
                                  "FAC", "GPE")
    entities = semantics.ordered_set_of_tags(entities)
    adjectives = semantics.adjectives(pos_tagged)
    adjectives = semantics.ordered_set_of_tags(adjectives)
    for tag in entities and adjectives:
        print(tag)
    for ent in entities:
        tag_dict[ent] = "ENT"
    print(entities)
    for adj in adjectives:
        tag_dict[adj] = "ADJ"
    print(adjectives)
    mean_arousal = lookup.mean(pos_tagged)
    print(mean_arousal)
Example #7
0
    while (True):
        # prompt user for input file name
        input_file = input(
            'Input file name [Press \'Enter\' to user default - input.txt]: ')

        # check if blank, change to default
        if (input_file == ''):
            input_file = 'input.txt'

        if (os.path.isfile(input_file)):
            break
        else:
            print('ImportError: could not find file', input_file)

    print('Found file', input_file)

    # open file and read all lines (removing any '\n' chars)
    with open(input_file, 'r') as open_file:
        lines = open_file.readlines()

    # parse the text to generate stack code
    parser = MyParser(print_tree, time_parse)
    output = parser.parse(lines)

    if (output != None):
        print('Finished parsing with no errors.')
        # print output
        print('\nPrinting generated output:')
        for node in output:
            print(node)
Example #8
0
class MainWindow(QMainWindow):
    def __init__(self):
        super(MainWindow, self).__init__()
        self.ui = Ui_MainWindow()
        self.ui.setupUi(self)
        self.setWindowTitle('Програма для морфологічного аналізу слів, та вирішування тестів')
        self.ui.pushButton_solve.clicked.connect(self.solve)
        self.parser_obj = MyParser()
        self.another = Another()
        self.ab = About()
        self.settings  = Settings()
        self.ui.action.triggered.connect(self.another.instruction)
        self.ui.action_about.triggered.connect(self.ab.about)
        self.ui.action_3.triggered.connect(self.settings.show_set)
        self.settings.ui.checkBox.clicked.connect(self.hide_predict)

        self.show_message('Програма не гарантує 100% правильність відповіді')

    def hide_predict(self):
        self.ui.checkBox_predict.hide()
        self.ui.groupBox_predict.hide()


    def solve(self):
        self.ROW = int(self.ui.lineEdit_number_of_row.text())
        self.WORDS = int(self.ui.lineEdit_number_of_words.text())
        self.answer = AnswerWindow(self.ROW,self.WORDS)
        text = self.ui.plainTextEdit.toPlainText()
        massive = self.make_2d_massive(text)
        self.answer.my_masive = massive
        self.check_parameters(massive)
        self.export = FileExport(self.global_save,self.ROW,self.WORDS)
        self.ui.actionTXT.triggered.connect(self.export.txt)
        self.ui.actionJSON.triggered.connect(self.export.json)

    def check_parameters(self,massive):
        if self.ui.checkBox_predict.isChecked():
            if self.ui.radioButton_rid.isChecked():
                msg = 'Я думаю відповідь '
                self.show_message(msg+self.predict_by_rid(self.find_rid(massive),
                                    woman=self.ui.checkBox_2.isChecked(),
                                    man = self.ui.checkBox_3.isChecked(),
                                    ser = self.ui.checkBox_4.isChecked()))

            elif self.ui.radioButton_chislo.isChecked():
                msg = 'Я думаю відповідь '
                self.show_message(msg + self.predict_by_chislo(self.find_chislo(self.find_vidminok(massive),massive),
                                                                mnog = self.ui.radioButton_5.isChecked(),
                                                                odni = self.ui.radioButton_6.isChecked()))

            elif self.ui.radioButton_vidmina.isChecked():
                msg = 'Я думаю відповідь '
                self.show_message(msg + self.predict_by_vidmina(self.find_vidmina(massive),
                                        a0 = self.ui.checkBox_vidm_1.isChecked(),
                                        a1 = self.ui.checkBox_vidm_2.isChecked(),
                                        a2 = self.ui.checkBox_vidm_3.isChecked(),
                                        a3 = self.ui.checkBox_vidm_4.isChecked()))
            else:
                self.show_message("Виберіть що хочете знайти")
        else:
            if self.ui.radioButton_rid.isChecked():
                self.answer.show_text(self.find_rid(massive))
            elif self.ui.radioButton_chislo.isChecked():
                self.answer.show_text(self.find_chislo(self.find_vidminok(massive),massive))
            elif self.ui.radioButton_vidminok.isChecked():
                self.answer.show_table(self.find_vidminok(massive))
            elif self.ui.radioButton_vidmina.isChecked():
                self.answer.show_text(self.find_vidmina(massive))
            else:
                self.show_message("Виберіть що хочете знайти")

    def show_message(self,answ):
        self.ui.msg.setText(str(answ))
        self.ui.msg.show() #print answer

    def make_2d_massive(self,PText):
        txt = PText.split('\n')
        l = []
        for n in range(len(txt)): #make 2d massive
            l.append(txt[n].split(","))
        for k in range(len(txt)): # delete upper letter
            text = l[k][0]
            text = text[1:len(text)]
            l[k][0] = text
        for i in range(self.ROW): #delete space in front of
            for j in range(self.WORDS):
                text = l[i][j]
                text = text[1:len(l[i][j])]
                l[i][j] = text
        return(l)

#--------------------------- find block ---------------------------------------
    def find_rid(self,massive):
        res = []
        for i in range(self.ROW):
            n = []
            for j in range(self.WORDS):
                try:
                    html = self.parser_obj.get_html("https://slovnyk.ua/index.php?swrd={}".format(massive[i][j]))
                    tmp = self.parser_obj.parse_word_data(html)[1]
                    tmp = tmp[1:len(tmp)-4]
                    n.append(tmp)
                except:
                    n.append("Слова немає в базі")
            res.append(n)
        self.global_save = res
        return(res)

    def find_vidminok(self,massive):
        res = []
        for i in range(self.ROW):
            n = []
            for j in range(self.WORDS):
                try:
                    html = self.parser_obj.get_html("https://slovnyk.ua/index.php?swrd={}".format(massive[i][j]))
                    n.append(self.parser_obj.parse_word_vidminok(html))
                except:
                    mb = []
                    for b in range(7):
                        mb.append(["Слова немає в базі","X","X"])
                    n.append(mb)
            res.append(n)
        self.global_save = res
        return res

    def find_chislo(self,massive,massive2):
        odnina = []
        mnogina = []
        res = []
        for i in range(self.ROW):
            n = []
            for j in range(self.WORDS):
                d = massive[i][j]
                wrd = massive2[i][j]
                for k in range(len(d)):
                    odnina.append(d[k][1])
                    mnogina.append(d[k][len(d[k])-1])
                if wrd in odnina:
                    n.append("Однина")
                elif wrd in mnogina:
                    n.append("Множина")
                else:
                    n.append("Слова немає в базі")
            res.append(n)
        self.global_save = res
        return res

    def find_vidmina(self,massive):
        vidminok = self.find_vidminok(massive)
        rid = self.find_rid(massive)
        res = []
        for i in range(self.ROW):
            n = []
            for j in range(self.WORDS):
                d = vidminok[i][j]
                if rid[i][j] == 'чоловічий':
                    if d[0][1][len(d[0][1])-1] == 'а':
                        n.append("1 відміна")
                    elif d[0][1][len(d[0][1])-1] == 'я':
                        n.append("1 відміна")
                    else:
                        n.append("2 відміна")
                elif rid[i][j] == 'жіночий':
                    if d[0][1][len(d[0][1])-1] == 'а':
                        n.append("1 відміна")
                    elif d[0][1][len(d[0][1])-1] == 'я':
                        n.append("1 відміна")
                    else:
                        n.append("3 відміна")
                elif rid[i][j] == 'середній':
                    count = 0
                    for bn in range(7):
                        if 'ат' in d[bn][1]:
                            count += 1
                        elif 'ят' in d[bn][1]:
                            count += 1
                        elif 'ен' in d[bn][1]:
                            count += 1
                    if count:
                        n.append("4 відміна")
                    else:
                        n.append("2 відміна")
                else:
                    n.append("X")
            res.append(n)
        self.global_save = res
        return(res)

#--------------------------- predict block ------------------------------------
    def predict_by_rid(self,data,**params):
        l = []
        if params['woman']:
            l.append("жіночий")
        if params['man']:
            l.append("чоловічий")
        if params['ser']:
            l.append("середній")
        if len(l) > 1:
            if l[0] == 'жіночий':
                if l[1] == 'чоловічий':
                    l.append("чоловічий і жіночий")
        res = []
        for i in range(self.ROW):
            count = 0
            for j in range(self.WORDS):
                if data[i][j] in l:
                    count +=1
            res.append(count)
        answer = ANSWER_LETTER[res.index(max(res))]
        self.global_save = answer
        return(answer)

    def predict_by_chislo(self,data,**params):
        l =[]
        if params['mnog']:
            l.append("Множина")
        if params['odni']:
            l.append("Однина")
        res = []
        for i in range(self.ROW):
            count = 0
            for j in range(self.WORDS):
                if data[i][j] in l:
                    count += 1
            res.append(count)
        answer = ANSWER_LETTER[res.index(max(res))]
        self.global_save = answer
        return(answer)

    def predict_by_vidmina(self,data,**params):
        l = []
        for k in range(4):
            if params["a"+str(k)]:
                l.append(str(k+1)+ " відміна")
        res = []
        for i in range(self.ROW):
            count = 0
            for j in range(self.WORDS):
                if data[i][j] in l:
                    count += 1
            res.append(count)
        answer = ANSWER_LETTER[res.index(max(res))]
        self.global_save = answer
        return(answer)
Example #9
0
class Tests(unittest.TestCase):
    """
    some test cases
    """
    def setUp(self):
        self.parser = MyParser()

    def test_one_option_defined(self):
        self.parser.add_option('--key', dtype='int', is_flag=True)
        ans = self.parser.check_options(['./test', '--key=123'])
        self.assertEqual({'--key': '123'}, ans)

    def test_two_options_defined(self):
        self.parser.add_option('--key', dtype='int', is_flag=True)
        self.parser.add_option('--name', dtype='str', is_flag=True)
        ans = self.parser.check_options(
            ['./test', '--key=12345', '--name=kaustubh'])
        self.assertEqual({'--key': '12345', '--name': 'kaustubh'}, ans)

    def test_three_options_defined(self):
        self.parser.add_option('--key', dtype='int', is_flag=True)
        self.parser.add_option('--local', dtype='str', is_flag=False)
        self.parser.add_option('--remote', dtype='str', is_flag=False)
        ans = self.parser.check_options(
            ['./test', '--key=19', '--local', '--remote'])
        self.assertEqual({
            '--key': '19',
            '--local': 'True',
            '--remote': 'True'
        }, ans)

    def test_option_with_invalid_datatype(self):
        self.parser.add_option('--key', dtype='int', is_flag=True)
        with self.assertRaises(MyParserError) as context:
            self.parser.check_options(['./test', '--key=cat'])
        self.assertEqual('The field has invalid value.',
                         str(context.exception))

    def test_unexpexted_option(self):
        self.parser.add_option('--local', dtype='str', is_flag=False)
        with self.assertRaises(MyParserError) as context:
            self.parser.check_options(['./test', '--local', '--remote'])
        self.assertEqual("Unexpected field given.", str(context.exception))

    def test_too_less_arguments_given(self):
        self.parser.add_option('--age', dtype='int', is_flag=True)
        with self.assertRaises(MyParserError) as context:
            self.parser.check_options(['./test', '--age'])
        self.assertEqual('Too less arguments.', str(context.exception))

    def test_too_many_arguments_given(self):
        self.parser.add_option('--key', dtype='int', is_flag=True)
        self.parser.add_option('--local', dtype='str', is_flag=False)
        with self.assertRaises(MyParserError) as context:
            self.parser.check_options(['./test', '--key=19', '--local=abc'])
        self.assertEqual('Too many arguments.', str(context.exception))

    def test_no_options_defined(self):
        self.parser.add_option('--key', dtype='int', is_flag=True)
        with self.assertRaises(MyParserError) as context:
            self.parser.check_options(['./test'])
        self.assertEqual('No options given.', str(context.exception))
Example #10
0
 def setUp(self):
     self.parser = MyParser()
import logging
from my_parser import MyParser
from aro_lookup import AroLookup
from api_calls import tagSearch
import semantics
import sys

genresUrl = 'http://labrosa.ee.columbia.edu/millionsong/sites/default/files/AdditionalFiles/unique_terms.txt'
testUrl = 'http://www.bbc.com/news/technology-31552029'
testSentence = "This is an ultimate, to beat Chelsea, who I think will go on and win the Champion's League - it really is."

lookup = AroLookup()
tag_dict = {}
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

parser = MyParser(testUrl)
text = parser.bpArtGetText()
print(text)
pos_tagged = semantics.text_to_pos_tags(text)
entities = semantics.entities(pos_tagged, "PER", "ORG", "LOC", "DAT", "FAC", "GPE")
print(entities)
print(semantics.ordered_set_of_tags(entities))

def main(url):
    parser = MyParser(url)
    text = parser.bpArtGetText()
    print(text)
    pos_tagged = semantics.text_to_pos_tags(text)
    entities = semantics.entities(pos_tagged, "PER", "ORG", "LOC", "DAT", "FAC", "GPE")
    entities = semantics.ordered_set_of_tags(entities)
    adjectives = semantics.adjectives(pos_tagged)
Example #12
0
### To Add:
### - Telephone number search
### - import pdf
### - output resulting data to json or similar
### - add functionality to run the script in a different folder

from filefinder import FileFinder
from my_parser import MyParser

files = FileFinder()
list_of_files, flag = files.get_files()
parsed_files = MyParser(list_of_files, flag)
parsed_files.write_json_file()
Example #13
0
class Crawler:
    def __init__(self):
        self.parser = MyParser()
        self.__base_url = base_url()
        self.__login_url = login_url()
        self.__failures = 0
        self.__session = self.open_session()
        self.cache = Cache()
        self.file_io_driver = FileIODriver()
        self.current_url_id = int(
            self.cache.last_id) if self.cache.last_id else crawl_start_id()
        self.current_url = ''

    def break_data_load(self) -> bool:
        return True if self.__failures == max_attempts() else False

    def open_session(self):
        session = requests.Session()
        session.headers.update({
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:57.0) Gecko/20100101 Firefox/57.0'
        })

        auth_data = authorization_data()
        page = session.get(self.__login_url).text
        auth_data['execution'] = self.parser.execution_data(page)
        session.post(self.__login_url, data=auth_data)

        return session

    def load_topic(self, page):
        page_index = 1
        while True:
            self.parser.parse_page(page, self)
            self.file_io_driver.save_messages(self.parser)
            next_page_url_of_same_topic = settings.base_url() + str(
                self.current_url_id) + '?page=' + str(page_index)
            page = self.__session.get(next_page_url_of_same_topic)
            if no_next_page_found() in page.text:
                break
            else:
                print(Fore.BLUE + 'Найдена новая страница темы')
            page_index += 1
        self.current_url_id = self.parser.next_url_id(page)

    def load_data(self):
        while self.current_url_id:
            # page=0 - первая страница темы, pageSize=Size5 - 50 сообщений на странице, максимальная порция.
            full_url = self.__base_url + str(
                self.current_url_id) + '?page=0&pageSize=Size5'
            self.cache.last_id = self.current_url_id
            page = self.__session.get(full_url)
            if no_page_found() in page.text:
                print(Fore.RED + 'Страница не найдена')
                self.__failures += 1
                # Странная ситуация, битых ссылок в этом алгоритме быть не должно. Но если попали на такую ссылку,
                # то ищем следующую рабочую перебором.
                self.current_url_id += 1
                sleep(sleep_timer())
            else:
                print(Fore.WHITE + 'Скачана страница -->',
                      Fore.GREEN + str(self.current_url_id))
                self.load_topic(page)
                self.__failures = 0
            if self.break_data_load():
                print(
                    Fore.YELLOW +
                    'Достигнуто максимальное количество попыток. Работа завершена id',
                    str(self.current_url_id))
                break
        else:
            print(Fore.GREEN + 'Работа успешно завершена')

    def save_data(self):
        self.cache.save()
        self.file_io_driver.save_messages(self.parser)
from my_parser import MyParser
from aro_lookup import AroLookup
from api_calls import tagSearch
import semantics
import sys

genresUrl = 'http://labrosa.ee.columbia.edu/millionsong/sites/default/files/AdditionalFiles/unique_terms.txt'
testUrl = 'http://www.bbc.com/news/technology-31552029'
testSentence = "This is an ultimate, to beat Chelsea, who I think will go on and win the Champion's League - it really is."

lookup = AroLookup()
tag_dict = {}
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)

parser = MyParser(testUrl)
text = parser.bpArtGetText()
print(text)
pos_tagged = semantics.text_to_pos_tags(text)
entities = semantics.entities(pos_tagged, "PER", "ORG", "LOC", "DAT", "FAC",
                              "GPE")
print(entities)
print(semantics.ordered_set_of_tags(entities))


def main(url):
    parser = MyParser(url)
    text = parser.bpArtGetText()
    print(text)
    pos_tagged = semantics.text_to_pos_tags(text)
    entities = semantics.entities(pos_tagged, "PER", "ORG", "LOC", "DAT",