def setUp(self): self.file = 'file.txt' self.data = [ '4 3', '<tag1 value = "HelloWorld">', '<tag2 name = "Name1">', '</tag2>', '</tag1>', 'tag1.tag2~name', 'tag1~name', 'tag1~value' ] self.number_of_tags = 4 self.number_of_queries = 3 self.tags = [ '<tag1 value = "HelloWorld">', '<tag2 name = "Name1">', '</tag2>', '</tag1>' ] self.tag1_parsed = {'tag1~value': 'HelloWorld'} self.tag1_name = 'tag1' self.tags_all_parsed = { 'tag1~value': 'HelloWorld', 'tag1.tag2~name': 'Name1', 'tag2~name': 'Name1' } self.queries = ['tag1.tag2~name', 'tag1~name', 'tag1~value'] self.queries_answers = ['Name1', 'Not Found!', 'HelloWorld'] self.Parser = Parser(self.file)
def main(): """ Main method that open and close files and ckecks the belonging of word to language defined by the automata. """ in_file_name = "input.txt" out_file_name = "output.txt" in_file = open(in_file_name, 'r') out_file = open(out_file_name, 'w') out_file.write('IlgizZamaleev' + '\n') problems = int(in_file.readline()) for i in range(problems): automato = Parser().parse_automata(in_file) tests = int(in_file.readline()) out_file.write(str(i + 1) + '\n') for j in range(tests): word = in_file.readline()[:-1] out_file.write(automato.check_word(word) + '\n') in_file.close() out_file.close()
def read(img: Union[str, PicHandler], path: str='D:\\Project\\') -> ElemBlock: if isinstance(img, str): ph = PicHandler(img, path=path) else: ph = img blocks = Parser.parse(ph.blocksOfPixels(), sensivity=4, merge='math') Parser.swap_coordinates(blocks) elemBlocks = [FormulaRecognizer.recBlock(block) for block in blocks] try: f = Formula(elemBlocks) except: return ElemBlock('', Position(0, 0, 0, 0), ok=False) print(f.texCode) #ph._show() return f.getFormula()
def translate(file): writer = CodeWriter(file) if '.vm' in file: parser = Parser(file) else: if os.getcwd().split('/')[-1] != file: os.chdir(file) for fileName in glob.glob('*.vm'): if fileName != const.SYS: writer.setFileName(fileName) parser = Parser(fileName) parse(parser, writer) writer.setFileName(fileName) parser = Parser(const.SYS) parse(parser, writer) writer.close()
def main(): parser = Parser(HTML.REDIS, PARSER.ELASTICSEARCH) """Crawler start""" crawler = Crawler(DB.MYSQL, HTML.REDIS, parser) try: crawler.run() except KeyboardInterrupt: crawler.stop() sys.exit(0)
def inline_place(bot, update): query = update.inline_query.query help = 'Scrivi il nome di una città per cercare solo gli eventi in quella città.\nPuoi anche cercare in una intera regione!\nOppure puoi cercare nei dintorni di una città scrivendo "dintorni" dopo il nome della città' if not query or len(query) == 0: return else: if query == 'help': results = [ InlineQueryResultArticle( id=uuid4(), title=help, input_message_content=InputTextMessageContent( "Un attimo, sto usando @cercaEventi_bot")) ] update.inline_query.answer(results) return p = Parser(query) events = p.getEvents() if len(events) > 0: results = [ InlineQueryResultArticle( id=uuid4(), title=event['name'], input_message_content=InputTextMessageContent( "{}\n<b>{}</b>\n<b>{}</b>\n{}".format( event['name'], event['date'], event['place'], event['link']), parse_mode='HTML'), thumb_url=event['img'], description="{}\n{}".format(event['date'], event['place'])) for event in events ] else: results = [ InlineQueryResultArticle( id=uuid4(), title='Nessun evento trovato per {}'.format(query), input_message_content=InputTextMessageContent( 'Nessun evento trovato per {}'.format(query))) ] update.inline_query.answer(results)
def __init__(self, version): self.split_version = Parser().parse(version)
from grammar import Grammar from my_parser import Parser def display_options(): print("1 Display Terminals") print("2 Display Non-terminals") print("3 Display Productions") print("4 Choose production to do closure") print("5 Choose symbol to do goto on state(ClosureLR of S'->S)") print("6 Col Can") if __name__ == '__main__': g: Grammar = Grammar('g1.in') p = Parser(g) while True: display_options() i = int(input()) if i == 1: print(p.get_e()) elif i == 2: print(p.get_n()) elif i == 3: print(p.get_p()) elif i == 4: __user_input = input("Give input:") print(p.closure_lr(__user_input)) elif i == 5: __user_input = input("Give input:") result = p.go_to_lr(p.closure_lr("S'->.S"), __user_input)
valid_loss, valid_acc = eval_net(args, model, validloader, criterion) vbar.set_description( 'valid set - average loss: {:.4f}, accuracy: {:.0f}%'.format( valid_loss, 100. * valid_acc)) if not args.filename == "": with open(args.filename, 'a') as f: f.write('%s %s %s %s' % (args.dataset, args.learn_eps, args.neighbor_pooling_type, args.graph_pooling_type)) f.write("\n") f.write("%f %f %f %f" % (train_loss, train_acc, valid_loss, valid_acc)) f.write("\n") lrbar.set_description("Learning eps with learn_eps={}: {}".format( args.learn_eps, [layer.eps.data.item() for layer in model.ginlayers])) tbar.close() vbar.close() lrbar.close() if __name__ == '__main__': args = Parser(description='GIN').args print('show all arguments configuration...') print(args) main(args)
from lexer import Lexer from my_parser import Parser from codegen import CodeGen if __name__ == '__main__': fname = "input.hel" with open(fname) as f: text_input = f.read() lexer = Lexer().get_lexer() tokens = lexer.lex(text_input) codegen = CodeGen() module = codegen.module builder = codegen.builder printf = codegen.printf pg = Parser(module, builder, printf) pg.parse() parser = pg.get_parser() parser.parse(tokens).eval() codegen.create_ir() codegen.save_ir("output.hll")
def setUp(self): self.sut = Crawler(DB.MYSQL, HTML.REDIS, Parser(HTML.REDIS, PARSER.ELASTICSEARCH))
def visit_collectionaccess(self, node): collection = self.search_scopes(node.collection.value) key = self.visit(node.key) if not key: key = node.key.value return collection[key] def interpret(self, tree): return self.visit(tree) def visit_print(self, node): print(self.visit(node.value)) if __name__ == '__main__': from my_lexer import Lexer from my_parser import Parser from my_preprocessor import Preprocessor file = 'test.my' code = open(file).read() lexer = Lexer(code, file) parser = Parser(lexer) t = parser.parse() symtab_builder = Preprocessor(parser.file_name) symtab_builder.check(t) if not symtab_builder.warnings: interpreter = Interpreter(parser.file_name) interpreter.interpret(t) else: print('Did not run')
class TestParser(unittest.TestCase): def setUp(self): self.file = 'file.txt' self.data = [ '4 3', '<tag1 value = "HelloWorld">', '<tag2 name = "Name1">', '</tag2>', '</tag1>', 'tag1.tag2~name', 'tag1~name', 'tag1~value' ] self.number_of_tags = 4 self.number_of_queries = 3 self.tags = [ '<tag1 value = "HelloWorld">', '<tag2 name = "Name1">', '</tag2>', '</tag1>' ] self.tag1_parsed = {'tag1~value': 'HelloWorld'} self.tag1_name = 'tag1' self.tags_all_parsed = { 'tag1~value': 'HelloWorld', 'tag1.tag2~name': 'Name1', 'tag2~name': 'Name1' } self.queries = ['tag1.tag2~name', 'tag1~name', 'tag1~value'] self.queries_answers = ['Name1', 'Not Found!', 'HelloWorld'] self.Parser = Parser(self.file) def test_get_data_from_file(self): self.assertEqual(self.data, self.Parser.data) def test_get_number_of_tags(self): self.assertEqual(self.number_of_tags, self.Parser.number_of_tags) def test_get_tags(self): self.assertEqual(self.data[1:5], self.Parser.all_tags) def test_get_queries(self): self.assertEqual(self.data[5:], self.Parser.queries_in_list) def test_get_variables_from_tags(self): self.assertEqual(self.tags_all_parsed, self.Parser.get_variables_from_tags()) def test_get_not_nested_tags_names_and_variables(self): self.assertEqual( self.tag1_parsed, self.Parser.get_not_nested_tags_names_and_variables(0)) def test_get_nested_tags_names_and_variables(self): self.assertEqual(self.tags_all_parsed, self.Parser.get_nested_tags_names_and_variables(0)) def test_get_current_tag_name_as_string(self): self.assertEqual( self.tag1_name, self.Parser.get_current_tag_name_as_string(self.tags[0])) def test_get_tag_variables_with_values(self): self.assertEqual( self.tag1_parsed, self.Parser.get_tag_variables_with_values(self.tags[0])) def test_is_opening_tag(self): self.assertEqual(True, self.Parser.is_opening_tag(self.tags[0])) def test_check_if_queries_exist_in_tags(self): self.assertEqual(self.queries_answers, self.Parser.check_if_queries_exist_in_tags())
from my_parser import Parser def fuel_process(fuel): tmp_fuel = 0 # Divide by 3 tmp_fuel = fuel / 3 # Round down tmp_fuel = int(tmp_fuel) # Substract 2 tmp_fuel = tmp_fuel - 2 return tmp_fuel # Read file my_parser = Parser("./input.txt") fuel_sum = 0 fuel_down = 0 fuel_calc = 0 # Process each module mass to find the fuel required for x in my_parser.sequence: # Calculate the fuel needed for the module fuel_calc = fuel_process(x) fuel_sum += fuel_calc fuel_down = fuel_calc # Calculate the fuel needed by the fuel itself while fuel_down > 0: fuel_calc = fuel_process(fuel_calc) if fuel_calc > 0:
def translate(file): parser = Parser(file) codeWriter = CodeWriter(file) while parser.hasMoreCommands(): if parser.commandType() == const.ARITHMETIC: codeWriter.writeArithmetic(parser.command(), parser.vmCommand()) if parser.commandType() in [const.PUSH, const.POP]: codeWriter.writePushPop(parser.commandType(), parser.arg1(), parser.arg2(), parser.vmCommand()) parser.advance() codeWriter.close()
""" интерфейсный модуль для оркестрации процессами программы. """ import pickle from typing import Dict, List, Union import configs as c from my_parser import Parser from preprocess import Preprocessing as pp from spimi import GetIndex from rank import RankList from boolean_search import BooleanSearch as bs # триггер на запуск парсера if c.CORPUS_PARSER: p = Parser() p.run() # триггер на сборку обратного индекса if c.BUILD_INDEX: idx = GetIndex() idx.save_block_index() idx.combine_block_index() # читаем с диска индекс и хэш с доп.информацией with open('index/full_index.pickle', 'rb') as f: index: Dict[int, List] = pickle.load(f) with open('index/doc_id_doc_name_dict.pickle', 'rb') as f: doc_id_doc_name_dict: Dict = pickle.load(f) while True: print('\n\nInput your boolean query:') query_string: str = input('>')
class ES: def __init__(self): self.stop_list = self.read_stop() self.template = { "index_patterns": "hw3", "settings": { "number_of_replicas": 1, "index.highlight.max_analyzed_offset": 2000000, "analysis": { "filter": { "english_stop": { "type": "stop", "stopwords": self.stop_list, }, "my_snow": { "type": "snowball", "language": "English" } }, "analyzer": { # custom analyzer "stopped" "stopped_stem": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", # custom filter "english_stop" "english_stop", "my_snow" ] } } } }, "mappings": { "_source": { "enabled": "true" }, "properties": { # fields "http_header": { "type": "keyword" }, "title": { "type": "keyword" }, "text_content": { "type": "text", "fielddata": "true", "analyzer": "stopped_stem", "index_options": "positions", }, "raw_html": { "type": "text", "index": "false" }, "in_links": { "type": "keyword" }, "out_links": { "type": "keyword" } } } } self.hosts = [ "https://f2ff43d409574698a747eaa43256d1e0.northamerica-northeast1.gcp.elastic-cloud.com:9243/" ] self.cloud_id = "CS6200:bm9ydGhhbWVyaWNhLW5vcnRoZWFzdDEuZ2NwLmVsYXN0aWMtY2xvdWQuY29tJGYyZmY0M2Q0MDk1NzQ2OThhNzQ3ZWFhNDMyNTZkMWUwJDU1ZTY4MGVhZjQ5MjRmNmM5NmY5YmIxNTRjZTQyN2Fk" self.name = "web_crawler" self.index = "hw3" self.es = Elasticsearch(hosts=self.hosts, timeout=60, clould_id=self.cloud_id, http_auth=('elastic', 'nRGUXlzD1f8kOT63iLehSG9a')) self.parser = Parser() def initialize(self): self.read_stop() # self.es.indices.delete(index=self.index) self.es.indices.put_template(name=self.name, body=self.template) self.es.indices.create(index=self.index) def es_control(self): self.parser.initialize("./output/") # upload docs, headers docs, headers = self.parser.doc_parse() actions = [{ "_op_type": "update", "_index": self.index, "_id": id, "doc": { "http_header": str(headers[id]), "title": "", "text_content": docs[id], "raw_html": "", "in_links": "", "out_links": "" }, "doc_as_upsert": "true" } for id in docs] helpers.bulk(self.es, actions=actions) docs, headers = None, None # upload title titles = self.parser.title_parse() actions = [{ "_op_type": "update", "_index": self.index, "_id": id, "script": { "source": """ if (ctx._source["title"] == "") { ctx._source["title"] = params["title"] } """, "lang": "painless", "params": { "title": titles[id] } } } for id in titles] helpers.bulk(self.es, actions=actions) titles = None # upload html for i in range(20): raw_html = self.parser.html_parse(2000 * i, 2000 * (i + 1)) actions = [{ "_op_type": "update", "_index": self.index, "_id": id, "script": { "source": """ if (ctx._source["raw_html"] == "") { ctx._source["raw_html"] = params["html"] } """, "lang": "painless", "params": { "html": raw_html[id] } } } for id in raw_html] helpers.bulk(self.es, actions=actions) raw_html = None # upload in_links, out_links in_links, out_links = self.parser.links_parse() # with open("./output/test_in_links.json", "r") as f: # in_links = json.load(f) # with open("./output/test_out_links.json", "r") as f: # out_links = json.load(f) actions = [{ "_op_type": "update", "_index": self.index, "_id": id, "script": { "source": """ if (ctx._source["in_links"] == "") { ctx._source["in_links"] = params["in_links"] } else { for (int i = 0; i < params["length"]; ++i) { if (ctx._source["in_links"].contains(params["in_links"][i]) == false) { ctx._source["in_links"].add(params["in_links"][i]) } } } """, "lang": "painless", "params": { "in_links": in_links[id], "length": len(in_links[id]) } } } for id in in_links] helpers.bulk(self.es, actions=actions) actions = [{ "_op_type": "update", "_index": self.index, "_id": id, "script": { "source": """ if (ctx._source["out_links"] == "") { ctx._source["out_links"] = params["out_links"] } else { for (int i = 0; i < params["length"]; ++i) { if (ctx._source["out_links"].contains(params["out_links"][i]) == false) { ctx._source["out_links"].add(params["out_links"][i]) } } } """, "lang": "painless", "params": { "out_links": out_links[id], "length": len(out_links[id]) } } } for id in out_links] helpers.bulk(self.es, actions=actions) def read_stop(self): stop_list = [] with open("E:/Will/work/NEU/CS 6200/Python Project/stoplist.txt", "r") as f: for line in f.readlines(): stop_list.append(line.replace("\n", "")) return stop_list
def __init__(self): self.stop_list = self.read_stop() self.template = { "index_patterns": "hw3", "settings": { "number_of_replicas": 1, "index.highlight.max_analyzed_offset": 2000000, "analysis": { "filter": { "english_stop": { "type": "stop", "stopwords": self.stop_list, }, "my_snow": { "type": "snowball", "language": "English" } }, "analyzer": { # custom analyzer "stopped" "stopped_stem": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", # custom filter "english_stop" "english_stop", "my_snow" ] } } } }, "mappings": { "_source": { "enabled": "true" }, "properties": { # fields "http_header": { "type": "keyword" }, "title": { "type": "keyword" }, "text_content": { "type": "text", "fielddata": "true", "analyzer": "stopped_stem", "index_options": "positions", }, "raw_html": { "type": "text", "index": "false" }, "in_links": { "type": "keyword" }, "out_links": { "type": "keyword" } } } } self.hosts = [ "https://f2ff43d409574698a747eaa43256d1e0.northamerica-northeast1.gcp.elastic-cloud.com:9243/" ] self.cloud_id = "CS6200:bm9ydGhhbWVyaWNhLW5vcnRoZWFzdDEuZ2NwLmVsYXN0aWMtY2xvdWQuY29tJGYyZmY0M2Q0MDk1NzQ2OThhNzQ3ZWFhNDMyNTZkMWUwJDU1ZTY4MGVhZjQ5MjRmNmM5NmY5YmIxNTRjZTQyN2Fk" self.name = "web_crawler" self.index = "hw3" self.es = Elasticsearch(hosts=self.hosts, timeout=60, clould_id=self.cloud_id, http_auth=('elastic', 'nRGUXlzD1f8kOT63iLehSG9a')) self.parser = Parser()
from flask import Flask, json, request from my_parser import Parser from sql_manager import SQL_Manager app = Flask(__name__) parser = Parser() manager = SQL_Manager() @app.route('/') def main(): return 'OK' @app.route('/imports', methods=['POST']) def import_data(): '''Retrieves data from request and adds it to the database. Returns: import_id & 201-status_code: data was imported message & 404/400-status_code: import failed ''' data = json.loads(request.data) check = parser.check(data) if check is not True: return check return manager.import_data(data)
def main(): with io.open("input_correct.txt", 'r') as fin: text = fin.read() lexer = Lexer(text) parser = Parser(lexer) parser.parse()