def setUp(self): yaml_files = [ 'lib/configs/ogc_identifier.yaml', 'lib/configs/iso_identifier.yaml' ] # set up for the known csw getcapabilities with open('tests/test_data/cwic_csw_v2_0_2.xml', 'r') as f: csw_content = f.read() csw_url = 'http://www.mapserver.com/cgi?SERVICE=WCS&VERSION=2.0.2&REQUEST=GETCAPABILITIES' csw_content = csw_content.replace('\\n', '') csw_parser = Parser(csw_content) self.csw_identifier = Identify(yaml_files, csw_content, csw_url, **{'parser': csw_parser}) # set up for the geonetwork mismatched namespacing iso issue with open('tests/test_data/geonetwork_iso_NOT_csw.xml', 'r') as f: iso_content = f.read() iso_url = 'http://catalog.data.gov/harvest/object/d5de6dde-3042-4daf-b4ba-95e21e3ab343' iso_content = iso_content.replace('\\n', '') iso_parser = Parser(iso_content) self.iso_identifier = Identify(yaml_files, iso_content, iso_url, **{'parser': iso_parser})
def test_parse_bike_page(self): page = PageLoader() response = page.get_page().text parser = Parser(response) links = parser.get_stolen_bikes() response = page.get_page_by_url(links[0]).text parser = Parser(response) bike_info = parser.parse_bike_page() result = False if len(bike_info) > 0: result = True self.assertTrue(result)
def main(workflow): query = "" if len(workflow.args) == 0 else workflow.args[0] config = workflow.stored_data("configuration") if config == None: Options.warning( "Didn't find your configuration", "Please supply your cheat sheet path using 'cf ~/your/path'", workflow) workflow.send_feedback() return -1 parser = Parser(config.getPath()) options = Options(parser, workflow) tokens = query.strip().split(" ", 1) tokens = [i.strip() for i in tokens if i != ""] if len(tokens) < 2: sheetName = "" if len(tokens) == 0 else tokens[0] handler = options.showAvailable if sheetName not in parser.availableSheets( ) else options.list handler(sheetName) else: sheetName = tokens[0] searchTerm = tokens[1] if sheetName == "--search": options.search(None, searchTerm) else: options.search(sheetName, searchTerm) workflow.send_feedback() return None
def run_compiler(source, target, debug=False): """Run Compiler Executes the compilation process given a source file path. Arguments: source: The source file to compile. target: The destination binary executable file. debug: If True, verbose parsing details are shown. (Default: False) Returns: True on success, False otherwise. """ # Define a temporary location for the intermediate C code TMP_CODE_FILE = './ir.c' # Create a Parser object to parse the inputted source file parser = Parser(debug) # Parse the source file to the temporary code file if not parser.parse(source, TMP_CODE_FILE): print('Error while parsing "%s"' % source) return False # Set up gcc compilation command gcc_cmd = ['gcc', '-m32', '-o', target, TMP_CODE_FILE] # Compile the temporary file with gcc. Output to the target location if subprocess.call(gcc_cmd) != 0: print('Error while compiling "%s"' % target) return False return True
def run_compiler(input_name, output_name): parser = Parser() analyzer = StaticAnalyzer() flow_generator = FlowGraph() code_generator = CodeGenerator() source_code = '' try: with open(input_name, 'r') as f: source_code = f.read() except FileNotFoundError: logging.error("File not found") exit(1) try: # group tokens into syntactical units using parser parse_tree = parser.parse(source_code) # perform semantic analyze symtab, ast = analyzer.analyze(parse_tree) # generate flow graph flow_graph = flow_generator.generate(ast) # generate code code = code_generator.generate(flow_graph, symtab) with open(output_name, 'w') as f: f.write(str(code)) except CompilerError as error: if str(error): logging.error("COMPILER_ERROR: {0}".format(str(error))) exit(1)
def _parse_calendars_process(input_tuple: (int, int, (int, str, str), datetime, dict)) -> dict: input_index, total_length, calendar_tuple, timestamp, website_base = input_tuple calendar_id, calendar_url, calendar_html_file_path = calendar_tuple file = os.path.basename(calendar_html_file_path) debug_output = "{}/{} | {}/{}".format(input_index, total_length, website_base["domain"], file) if not os.path.isfile(calendar_html_file_path): debug_output += " | 0 (File '{}' does not exist!)".format( calendar_html_file_path) print(debug_output) return {calendar_id: []} with open(calendar_html_file_path, encoding="utf-8") as html_file: dom = etree.parse(html_file, etree.HTMLParser()) parser = Parser(website_base["parser"]) parser.set_dom(dom) event_urls = parser.get_event_urls() events_to_insert = [] for index, url_path in enumerate(event_urls): event_url = urllib.urljoin(calendar_url, url_path) events_to_insert.append((event_url, timestamp)) debug_output += " | {}".format(len(events_to_insert)) print(debug_output) return {calendar_id: events_to_insert}
def setUp(self): data = '''<xml> <node>Does it parse? <br/> It <em>should</em>!</node> <nextnode>Wow, that's a typography sin right there, but <a href="#anchor">Nope</a> and <span>Many XML</span>.</nextnode> </xml> ''' self.parser = Parser(data)
def output_pharser_result(format_file_input, data_file_input): format_file = mockFileName(format_file_input) data_file = mockFileName(data_file_input) parser = Parser(format_file) parser.loadData(data_file) return parser.asObject()
def test_get_pages(self): page = PageLoader() response = page.get_page() parser = Parser(response.text) links = parser.get_pages() result = False if len(links) > 0: result = True self.assertTrue(result)
def main(): text = "" with open("src/sample-source-codes/test5.cfpl", "r") as file: for line in file.readlines(): text += line.lstrip() + "\\n" lexer = Lexer(text) parser = Parser(lexer) interpreter = Interpreter(parser) interpreter.interpret()
def main(): parser = Parser() # ms.printDB('lpmdb.bin') # ms.json_to_lpmdb_bin('lpmdb.json', 'lpmdb.bin') # mv = ms.readMovieByPos('lpmdb.bin', 11616) # print(mv.title) # parser.parse(':: from title filter the') while parser.parse(input()): # returns false if input() returns "exit" pass
def test_without_typedef(): testcase = """ //comment struct FirstName { int foo; float bar[1]; } NameAlias; """ lexer = Lexer(testcase) parser = Parser(lexer) structure = parser.parse() assert structure is not None
def setUp(self): yaml_file = 'tests/test_data/complex_identifier_test.yaml' with open('tests/test_data/wfs_v1_1_0.xml', 'r') as f: content = f.read() url = 'http://www.mapserver.com/cgi?SERVICE=WFS&VERSION=1.1.0&REQUEST=GETCAPABILITIES' content = content.replace('\\n', '') parser = Parser(content) self.identifier = Identify([yaml_file], content, url, **{'parser': parser})
def __init__(self): self.arguments = Arguments() self.blender = Blender() self.parser = Parser() self.botfile = self.arguments.getBotFile() self.blender.unselectEverything() print("\nNow building " + self.botfile + "...") self.cubeData = self.parser.parseBotFile(self.botfile) self.cubedatabase = self.parser.parseCSVFile("assets/cubes.csv") self.blender.build(self.cubeData, self.cubedatabase) print("done!")
def test_multiple_names(): testcase = """ //comment typedef struct StructName { int foo; float bar[MAX_SIZE]; } StructNameAlias; """ lexer = Lexer(testcase) parser = Parser(lexer) structure = parser.parse() assert structure.name == 'StructNameAlias'
def main(): files = os.listdir(f"{ BASE_DIR }/src/sample-source-codes") for file_name in files: text = "" with open(f"src/sample-source-codes/{file_name}", "r") as file: for line in file.readlines(): text += line.lstrip() + "\\n" lexer = Lexer(text) parser = Parser(lexer) interpreter = Interpreter(parser) interpreter.interpret()
def setUp(self): yaml_file = 'lib/configs/thredds_identifier.yaml' with open('tests/test_data/mod_stellwagen.xml', 'r') as f: content = f.read() url = 'http://stellwagen.er.usgs.gov/thredds/catalog/TSdata/catalog.xml' content = content.replace('\\n', '') parser = Parser(content) self.identifier = Identify([yaml_file], content, url, **{'parser': parser})
def test_if_returning_iso_protocol_for_chunk(self): with open('tests/test_data/invalid_iso_chunk.xml', 'r') as f: content = f.read() url = 'http://www.mapserver.com/some_iso' content = content.replace('\\n', '') parser = Parser(content) identifier = Identify([self.yaml_file], content, url, **{'parser': parser}) identifier.identify() self.assertFalse(identifier.protocol == 'ISO-19115')
def setUp(self): yaml_file = 'tests/test_data/combined_version_identifier_test.yaml' content = '''<catalog xmlns="http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0 http://www.unidata.ucar.edu/schemas/thredds/InvCatalog.1.0.2.xsd" version="1.0.2" name="Actinic Flux measurements during OASIS Barrow field intensive Spring 2009"></catalog>''' url = 'http://www.unidata.com/hyrax/thredds' self.parser = Parser(content) self.identifier = Identify([yaml_file], content, url) self.identifier.identify()
def setUp(self): yaml_file = 'tests/test_data/complex_identifier_test.yaml' with open( 'tests/test_data/esri_wms_35bd4e2ce8cd13e8697b03976ffe1ee6.txt', 'r') as f: content = f.read() url = 'http://www.mapserver.com/cgi?SERVICE=WMS&VERSION=1.3.0&REQUEST=GETCAPABILITIES' content = content.replace('\\n', '') parser = Parser(content) self.identifier = Identify([yaml_file], content, url, **{'parser': parser})
def test_comment_before_bracket(): testcase = """ //comment typedef struct // comment before opening bracket { int a; unsigned long long int b; /* comment before closing bracket */ } StructName; """ lexer = Lexer(testcase) parser = Parser(lexer) structure = parser.parse() assert structure is not None
def setUp(self): ''' we are assuming input from the solr sample parser so this is the encoded, cdata-removed input except i have blended the two a bit (handling that \\n issue in the parser vs the cdata issue in the solr response) so init the parser with a file that reflects that ''' with open('tests/test_data/basic_osdd_c1576284036448b5ef3d16b2cd37acbc.txt', 'r') as f: data = f.read() data = data.replace('\\n', ' ') self.parser = Parser(data)
def test_if_returning_iso_protocol_for_ds(self): with open('tests/test_data/iso-19115_ds.xml', 'r') as f: content = f.read() url = 'http://www.mapserver.com/some_iso' content = content.replace('\\n', '') parser = Parser(content) identifier = Identify([self.yaml_file], content, url, **{'parser': parser}) identifier.identify() print identifier.to_json() self.assertTrue(identifier.protocol == 'ISO-19115 DS') self.assertTrue(identifier.version == 'ISO19115 2003/Cor.1:2006') self.assertTrue(identifier.has_metadata)
def setUp(self): # yaml_file = 'lib/configs/rdf_identifier.yaml' with open( 'tests/test_data/datagov_9bcffa1c-6164-4635-bc2c-6c98cce59d7b.rdf', 'r') as f: content = f.read() url = 'http://catalog.data.gov/9bcffa1c-6164-4635-bc2c-6c98cce59d7b.rdf' content = content.replace('\\n', '') parser = Parser(content) self.identifier = Identify([ 'lib/configs/iso_identifier.yaml', 'lib/configs/ogc_identifier.yaml', 'lib/configs/oaipmh_identifier.yaml', 'lib/configs/rdf_identifier.yaml' ], content, url, **{'parser': parser})
def main(grammarFile, tokenizer, testString): parser = Parser(grammarFile, tokenizer) testString = parser.process_input(testString) print('tokenization: ' + ' '.join(map(lambda s: s.replace('\'', ''), (testString)))) parser.cfg.convertToCNF(allow_recursive_start=True, allow_mixed_terms=False, allow_epsilon_rules=True, allow_unit_rules=True) if parser.cfg.findMember() == None: return None, INF def runTest(k): lev = levenshtein_automata(testString, k) inter = intersect(parser.cfg, lev) return inter.findMember() if runTest(0) != None: return testString, 0 mn = 0 # exclusive mx = 1 # inclusive match = runTest(mx) while match == None: mn = mx mx *= 2 match = runTest(mx) maxMatch = match while mx - mn > 1: h = (mx + mn) // 2 match = runTest(h) if match == None: mn = h else: mx = h maxMatch = match return (maxMatch, mx)
def conn_string(self, conn, data, addr): """ This method is called when a request is received form server listening. This works de request message and pass the message to receiver. :param conn: connection socket :param data: request data :param addr: socket address """ request, b = Parser().http_to_dict(data) if self.do_cache: has, cache = Cache().there_is_cache(data) else: has = False cache = '' if not has: try: url = request['path'] http_pos = url.find("://") # find the position of :// if http_pos == -1: temp = url else: temp = url[(http_pos + 3):] # get the rest of the url port_pos = temp.find(":") # find the port if any webserver_pos = temp.find("/") # find the end of the web server if webserver_pos == -1: webserver_pos = len(temp) if port_pos == -1 or webserver_pos < port_pos: # default port port = 80 webserver = temp[:webserver_pos] else: # specific port port = int((temp[(port_pos + 1):])[:webserver_pos - port_pos - 1]) webserver = temp[:port_pos] self.proxy_server(webserver, port, conn, data, addr) except Exception, e: print e pass
def main(workflow): # Try to read configuration from local disk config = workflow.stored_data("configuration") if config is None: Options.warning("Didn't find your configuration", "Please supply your cheat sheet path using 'cf ~/your/path'", workflow) workflow.send_feedback() return -1 parser = Parser(config.getPath()) # Note: by pasing workflow as a variable, its state is changed in Options.py logic options = Options(parser, workflow) # Query is whatever comes after "cheat". Stored in one single variable query = "" if len(workflow.args) == 0 else workflow.args[0] tokens = query.strip().split(" ", 1) # len 2 list tokens = [i.strip() for i in tokens if i != ""] if len(tokens) == 0: options.showAvailable() workflow.send_feedback() return None if len(tokens) == 1 and tokens[0] == "--search": Options.hint("Globally searching for ...?", "In global mode", workflow) workflow.send_feedback() return None if len(tokens) == 1 and tokens[0] not in parser.availableSheets(): options.showAvailable(tokens[0]) workflow.send_feedback() return None if len(tokens) == 1: options.list(tokens[0]) workflow.send_feedback() return None sheetName = None if tokens[0] == "--search" else tokens[0] searchTerm = tokens[1] options.searchInSheetByKeyword(sheetName, searchTerm) workflow.send_feedback() return None
def _parse_calendars_process(input_tuple: (int, int, (int, str, str), datetime, dict)) -> dict: simple_logger = logging.getLogger(SIMPLE_LOGGER_PREFIX + __file__) input_index, total_length, calendar_tuple, timestamp, website_base = input_tuple calendar_id, calendar_url, calendar_html_file_path = calendar_tuple file = os.path.basename(calendar_html_file_path) info_output = "{}/{} | {}/{}".format(input_index, total_length, website_base["domain"], file) if not os.path.isfile(calendar_html_file_path): simple_logger.warning(info_output + " | 0 (File '{}' does not exist!)".format( calendar_html_file_path)) return {calendar_id: []} with open(calendar_html_file_path, encoding="utf-8") as html_file: dom = etree.parse(html_file, etree.HTMLParser()) parser = Parser(website_base["parser"]) parser.set_dom(dom) events_to_insert = [] for index, url_path in enumerate(parser.get_event_urls()): event_url = url_path if not bool(urllib.urlparse(event_url).netloc): event_url = urllib.urljoin(calendar_url, url_path) events_to_insert.append((event_url, timestamp)) simple_logger.info(info_output + " | {}".format(len(events_to_insert))) if len(events_to_insert) == 0: if len(parser.error_messages) != 0: simple_logger.debug("Parser's errors: {}".format( json.dumps(parser.error_messages, indent=4))) else: simple_logger.debug("Found URLs: {}".format( json.dumps([event_url for event_url, _ in events_to_insert], indent=4))) return {calendar_id: events_to_insert}
def test_correct_syntax(): testcase = """ //comment typedef struct { int a; float b; /* some "multiline" comment */ struct { long double c; } inner_struct_name; signed short d[123]; unsigned char e; } StructName; """ lexer = Lexer(testcase) parser = Parser(lexer) structure = parser.parse() assert structure.name == "StructName" assert structure.description == "comment" assert len(structure.variables) == 5
def test_rdf_language(self): with open( 'tests/test_data/rdf_french_ed14b44e96042ad56c11cc0ca3768979.xml', 'r') as f: content = f.read() url = 'http://catalog.data.gov/9bcffa1c-6164-4635-bc2c-6c98cce59d7b.rdf' content = content.replace('\\n', '') parser = Parser(content) identifier = Identify([ 'lib/configs/iso_identifier.yaml', 'lib/configs/ogc_identifier.yaml', 'lib/configs/oaipmh_identifier.yaml', 'lib/configs/rdf_identifier.yaml' ], content, url, **{'parser': parser}) identifier.identify() print identifier.to_json() self.assertTrue(identifier.protocol == 'RDF') self.assertTrue(identifier.language == 'fr')