def __init__(self): self.parser = MyParser() self.__base_url = base_url() self.__login_url = login_url() self.__failures = 0 self.__session = self.open_session() self.cache = Cache() self.file_io_driver = FileIODriver() self.current_url_id = int( self.cache.last_id) if self.cache.last_id else crawl_start_id() self.current_url = ''
def __init__(self): super(MainWindow, self).__init__() self.ui = Ui_MainWindow() self.ui.setupUi(self) self.setWindowTitle('Програма для морфологічного аналізу слів, та вирішування тестів') self.ui.pushButton_solve.clicked.connect(self.solve) self.parser_obj = MyParser() self.another = Another() self.ab = About() self.settings = Settings() self.ui.action.triggered.connect(self.another.instruction) self.ui.action_about.triggered.connect(self.ab.about) self.ui.action_3.triggered.connect(self.settings.show_set) self.settings.ui.checkBox.clicked.connect(self.hide_predict) self.show_message('Програма не гарантує 100% правильність відповіді')
def main(): # コマンドライン引数を取得 args = sys.argv # 関数定義 funcs = args[1] # 定数定義 defines = args[2] # 入力ファイル infile = args[3] # 出力ファイル outfile = args[4] # 字句解析する parser = MyParser(Lexer(LexerReader(infile)), funcs, defines) # 構文解析する parser.parse(outfile)
def main(url): parser = MyParser(url) text = parser.bpArtGetText() print(text) pos_tagged = semantics.text_to_pos_tags(text) entities = semantics.entities(pos_tagged, "PER", "ORG", "LOC", "DAT", "FAC", "GPE") entities = semantics.ordered_set_of_tags(entities) adjectives = semantics.adjectives(pos_tagged) adjectives = semantics.ordered_set_of_tags(adjectives) for tag in entities and adjectives: print(tag) for ent in entities: tag_dict[ent] = "ENT" print(entities) for adj in adjectives: tag_dict[adj] = "ADJ" print(adjectives) mean_arousal = lookup.mean(pos_tagged) print(mean_arousal)
while (True): # prompt user for input file name input_file = input( 'Input file name [Press \'Enter\' to user default - input.txt]: ') # check if blank, change to default if (input_file == ''): input_file = 'input.txt' if (os.path.isfile(input_file)): break else: print('ImportError: could not find file', input_file) print('Found file', input_file) # open file and read all lines (removing any '\n' chars) with open(input_file, 'r') as open_file: lines = open_file.readlines() # parse the text to generate stack code parser = MyParser(print_tree, time_parse) output = parser.parse(lines) if (output != None): print('Finished parsing with no errors.') # print output print('\nPrinting generated output:') for node in output: print(node)
class MainWindow(QMainWindow): def __init__(self): super(MainWindow, self).__init__() self.ui = Ui_MainWindow() self.ui.setupUi(self) self.setWindowTitle('Програма для морфологічного аналізу слів, та вирішування тестів') self.ui.pushButton_solve.clicked.connect(self.solve) self.parser_obj = MyParser() self.another = Another() self.ab = About() self.settings = Settings() self.ui.action.triggered.connect(self.another.instruction) self.ui.action_about.triggered.connect(self.ab.about) self.ui.action_3.triggered.connect(self.settings.show_set) self.settings.ui.checkBox.clicked.connect(self.hide_predict) self.show_message('Програма не гарантує 100% правильність відповіді') def hide_predict(self): self.ui.checkBox_predict.hide() self.ui.groupBox_predict.hide() def solve(self): self.ROW = int(self.ui.lineEdit_number_of_row.text()) self.WORDS = int(self.ui.lineEdit_number_of_words.text()) self.answer = AnswerWindow(self.ROW,self.WORDS) text = self.ui.plainTextEdit.toPlainText() massive = self.make_2d_massive(text) self.answer.my_masive = massive self.check_parameters(massive) self.export = FileExport(self.global_save,self.ROW,self.WORDS) self.ui.actionTXT.triggered.connect(self.export.txt) self.ui.actionJSON.triggered.connect(self.export.json) def check_parameters(self,massive): if self.ui.checkBox_predict.isChecked(): if self.ui.radioButton_rid.isChecked(): msg = 'Я думаю відповідь ' self.show_message(msg+self.predict_by_rid(self.find_rid(massive), woman=self.ui.checkBox_2.isChecked(), man = self.ui.checkBox_3.isChecked(), ser = self.ui.checkBox_4.isChecked())) elif self.ui.radioButton_chislo.isChecked(): msg = 'Я думаю відповідь ' self.show_message(msg + self.predict_by_chislo(self.find_chislo(self.find_vidminok(massive),massive), mnog = self.ui.radioButton_5.isChecked(), odni = self.ui.radioButton_6.isChecked())) elif self.ui.radioButton_vidmina.isChecked(): msg = 'Я думаю відповідь ' self.show_message(msg + self.predict_by_vidmina(self.find_vidmina(massive), a0 = self.ui.checkBox_vidm_1.isChecked(), a1 = self.ui.checkBox_vidm_2.isChecked(), a2 = self.ui.checkBox_vidm_3.isChecked(), a3 = self.ui.checkBox_vidm_4.isChecked())) else: self.show_message("Виберіть що хочете знайти") else: if self.ui.radioButton_rid.isChecked(): self.answer.show_text(self.find_rid(massive)) elif self.ui.radioButton_chislo.isChecked(): self.answer.show_text(self.find_chislo(self.find_vidminok(massive),massive)) elif self.ui.radioButton_vidminok.isChecked(): self.answer.show_table(self.find_vidminok(massive)) elif self.ui.radioButton_vidmina.isChecked(): self.answer.show_text(self.find_vidmina(massive)) else: self.show_message("Виберіть що хочете знайти") def show_message(self,answ): self.ui.msg.setText(str(answ)) self.ui.msg.show() #print answer def make_2d_massive(self,PText): txt = PText.split('\n') l = [] for n in range(len(txt)): #make 2d massive l.append(txt[n].split(",")) for k in range(len(txt)): # delete upper letter text = l[k][0] text = text[1:len(text)] l[k][0] = text for i in range(self.ROW): #delete space in front of for j in range(self.WORDS): text = l[i][j] text = text[1:len(l[i][j])] l[i][j] = text return(l) #--------------------------- find block --------------------------------------- def find_rid(self,massive): res = [] for i in range(self.ROW): n = [] for j in range(self.WORDS): try: html = self.parser_obj.get_html("https://slovnyk.ua/index.php?swrd={}".format(massive[i][j])) tmp = self.parser_obj.parse_word_data(html)[1] tmp = tmp[1:len(tmp)-4] n.append(tmp) except: n.append("Слова немає в базі") res.append(n) self.global_save = res return(res) def find_vidminok(self,massive): res = [] for i in range(self.ROW): n = [] for j in range(self.WORDS): try: html = self.parser_obj.get_html("https://slovnyk.ua/index.php?swrd={}".format(massive[i][j])) n.append(self.parser_obj.parse_word_vidminok(html)) except: mb = [] for b in range(7): mb.append(["Слова немає в базі","X","X"]) n.append(mb) res.append(n) self.global_save = res return res def find_chislo(self,massive,massive2): odnina = [] mnogina = [] res = [] for i in range(self.ROW): n = [] for j in range(self.WORDS): d = massive[i][j] wrd = massive2[i][j] for k in range(len(d)): odnina.append(d[k][1]) mnogina.append(d[k][len(d[k])-1]) if wrd in odnina: n.append("Однина") elif wrd in mnogina: n.append("Множина") else: n.append("Слова немає в базі") res.append(n) self.global_save = res return res def find_vidmina(self,massive): vidminok = self.find_vidminok(massive) rid = self.find_rid(massive) res = [] for i in range(self.ROW): n = [] for j in range(self.WORDS): d = vidminok[i][j] if rid[i][j] == 'чоловічий': if d[0][1][len(d[0][1])-1] == 'а': n.append("1 відміна") elif d[0][1][len(d[0][1])-1] == 'я': n.append("1 відміна") else: n.append("2 відміна") elif rid[i][j] == 'жіночий': if d[0][1][len(d[0][1])-1] == 'а': n.append("1 відміна") elif d[0][1][len(d[0][1])-1] == 'я': n.append("1 відміна") else: n.append("3 відміна") elif rid[i][j] == 'середній': count = 0 for bn in range(7): if 'ат' in d[bn][1]: count += 1 elif 'ят' in d[bn][1]: count += 1 elif 'ен' in d[bn][1]: count += 1 if count: n.append("4 відміна") else: n.append("2 відміна") else: n.append("X") res.append(n) self.global_save = res return(res) #--------------------------- predict block ------------------------------------ def predict_by_rid(self,data,**params): l = [] if params['woman']: l.append("жіночий") if params['man']: l.append("чоловічий") if params['ser']: l.append("середній") if len(l) > 1: if l[0] == 'жіночий': if l[1] == 'чоловічий': l.append("чоловічий і жіночий") res = [] for i in range(self.ROW): count = 0 for j in range(self.WORDS): if data[i][j] in l: count +=1 res.append(count) answer = ANSWER_LETTER[res.index(max(res))] self.global_save = answer return(answer) def predict_by_chislo(self,data,**params): l =[] if params['mnog']: l.append("Множина") if params['odni']: l.append("Однина") res = [] for i in range(self.ROW): count = 0 for j in range(self.WORDS): if data[i][j] in l: count += 1 res.append(count) answer = ANSWER_LETTER[res.index(max(res))] self.global_save = answer return(answer) def predict_by_vidmina(self,data,**params): l = [] for k in range(4): if params["a"+str(k)]: l.append(str(k+1)+ " відміна") res = [] for i in range(self.ROW): count = 0 for j in range(self.WORDS): if data[i][j] in l: count += 1 res.append(count) answer = ANSWER_LETTER[res.index(max(res))] self.global_save = answer return(answer)
class Tests(unittest.TestCase): """ some test cases """ def setUp(self): self.parser = MyParser() def test_one_option_defined(self): self.parser.add_option('--key', dtype='int', is_flag=True) ans = self.parser.check_options(['./test', '--key=123']) self.assertEqual({'--key': '123'}, ans) def test_two_options_defined(self): self.parser.add_option('--key', dtype='int', is_flag=True) self.parser.add_option('--name', dtype='str', is_flag=True) ans = self.parser.check_options( ['./test', '--key=12345', '--name=kaustubh']) self.assertEqual({'--key': '12345', '--name': 'kaustubh'}, ans) def test_three_options_defined(self): self.parser.add_option('--key', dtype='int', is_flag=True) self.parser.add_option('--local', dtype='str', is_flag=False) self.parser.add_option('--remote', dtype='str', is_flag=False) ans = self.parser.check_options( ['./test', '--key=19', '--local', '--remote']) self.assertEqual({ '--key': '19', '--local': 'True', '--remote': 'True' }, ans) def test_option_with_invalid_datatype(self): self.parser.add_option('--key', dtype='int', is_flag=True) with self.assertRaises(MyParserError) as context: self.parser.check_options(['./test', '--key=cat']) self.assertEqual('The field has invalid value.', str(context.exception)) def test_unexpexted_option(self): self.parser.add_option('--local', dtype='str', is_flag=False) with self.assertRaises(MyParserError) as context: self.parser.check_options(['./test', '--local', '--remote']) self.assertEqual("Unexpected field given.", str(context.exception)) def test_too_less_arguments_given(self): self.parser.add_option('--age', dtype='int', is_flag=True) with self.assertRaises(MyParserError) as context: self.parser.check_options(['./test', '--age']) self.assertEqual('Too less arguments.', str(context.exception)) def test_too_many_arguments_given(self): self.parser.add_option('--key', dtype='int', is_flag=True) self.parser.add_option('--local', dtype='str', is_flag=False) with self.assertRaises(MyParserError) as context: self.parser.check_options(['./test', '--key=19', '--local=abc']) self.assertEqual('Too many arguments.', str(context.exception)) def test_no_options_defined(self): self.parser.add_option('--key', dtype='int', is_flag=True) with self.assertRaises(MyParserError) as context: self.parser.check_options(['./test']) self.assertEqual('No options given.', str(context.exception))
def setUp(self): self.parser = MyParser()
import logging from my_parser import MyParser from aro_lookup import AroLookup from api_calls import tagSearch import semantics import sys genresUrl = 'http://labrosa.ee.columbia.edu/millionsong/sites/default/files/AdditionalFiles/unique_terms.txt' testUrl = 'http://www.bbc.com/news/technology-31552029' testSentence = "This is an ultimate, to beat Chelsea, who I think will go on and win the Champion's League - it really is." lookup = AroLookup() tag_dict = {} logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) parser = MyParser(testUrl) text = parser.bpArtGetText() print(text) pos_tagged = semantics.text_to_pos_tags(text) entities = semantics.entities(pos_tagged, "PER", "ORG", "LOC", "DAT", "FAC", "GPE") print(entities) print(semantics.ordered_set_of_tags(entities)) def main(url): parser = MyParser(url) text = parser.bpArtGetText() print(text) pos_tagged = semantics.text_to_pos_tags(text) entities = semantics.entities(pos_tagged, "PER", "ORG", "LOC", "DAT", "FAC", "GPE") entities = semantics.ordered_set_of_tags(entities) adjectives = semantics.adjectives(pos_tagged)
### To Add: ### - Telephone number search ### - import pdf ### - output resulting data to json or similar ### - add functionality to run the script in a different folder from filefinder import FileFinder from my_parser import MyParser files = FileFinder() list_of_files, flag = files.get_files() parsed_files = MyParser(list_of_files, flag) parsed_files.write_json_file()
class Crawler: def __init__(self): self.parser = MyParser() self.__base_url = base_url() self.__login_url = login_url() self.__failures = 0 self.__session = self.open_session() self.cache = Cache() self.file_io_driver = FileIODriver() self.current_url_id = int( self.cache.last_id) if self.cache.last_id else crawl_start_id() self.current_url = '' def break_data_load(self) -> bool: return True if self.__failures == max_attempts() else False def open_session(self): session = requests.Session() session.headers.update({ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:57.0) Gecko/20100101 Firefox/57.0' }) auth_data = authorization_data() page = session.get(self.__login_url).text auth_data['execution'] = self.parser.execution_data(page) session.post(self.__login_url, data=auth_data) return session def load_topic(self, page): page_index = 1 while True: self.parser.parse_page(page, self) self.file_io_driver.save_messages(self.parser) next_page_url_of_same_topic = settings.base_url() + str( self.current_url_id) + '?page=' + str(page_index) page = self.__session.get(next_page_url_of_same_topic) if no_next_page_found() in page.text: break else: print(Fore.BLUE + 'Найдена новая страница темы') page_index += 1 self.current_url_id = self.parser.next_url_id(page) def load_data(self): while self.current_url_id: # page=0 - первая страница темы, pageSize=Size5 - 50 сообщений на странице, максимальная порция. full_url = self.__base_url + str( self.current_url_id) + '?page=0&pageSize=Size5' self.cache.last_id = self.current_url_id page = self.__session.get(full_url) if no_page_found() in page.text: print(Fore.RED + 'Страница не найдена') self.__failures += 1 # Странная ситуация, битых ссылок в этом алгоритме быть не должно. Но если попали на такую ссылку, # то ищем следующую рабочую перебором. self.current_url_id += 1 sleep(sleep_timer()) else: print(Fore.WHITE + 'Скачана страница -->', Fore.GREEN + str(self.current_url_id)) self.load_topic(page) self.__failures = 0 if self.break_data_load(): print( Fore.YELLOW + 'Достигнуто максимальное количество попыток. Работа завершена id', str(self.current_url_id)) break else: print(Fore.GREEN + 'Работа успешно завершена') def save_data(self): self.cache.save() self.file_io_driver.save_messages(self.parser)
from my_parser import MyParser from aro_lookup import AroLookup from api_calls import tagSearch import semantics import sys genresUrl = 'http://labrosa.ee.columbia.edu/millionsong/sites/default/files/AdditionalFiles/unique_terms.txt' testUrl = 'http://www.bbc.com/news/technology-31552029' testSentence = "This is an ultimate, to beat Chelsea, who I think will go on and win the Champion's League - it really is." lookup = AroLookup() tag_dict = {} logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) parser = MyParser(testUrl) text = parser.bpArtGetText() print(text) pos_tagged = semantics.text_to_pos_tags(text) entities = semantics.entities(pos_tagged, "PER", "ORG", "LOC", "DAT", "FAC", "GPE") print(entities) print(semantics.ordered_set_of_tags(entities)) def main(url): parser = MyParser(url) text = parser.bpArtGetText() print(text) pos_tagged = semantics.text_to_pos_tags(text) entities = semantics.entities(pos_tagged, "PER", "ORG", "LOC", "DAT",