def parse_mountain_area(): p = Parser("/AIP/ENR/ENR%201/ES_ENR_1_1_en.pdf") #alongborder="610213N 0114917E - 632701N 0114917E - 661457N 0141140E - 682200N 0173441E - 683923N 0183004E - 683141N 0194631E - 690945N 0202604E - 683533N 0221411E - 680424N 0233833E - 670159N 0240734E - 663602N 0240455E - " areas = [] for pagenr in xrange(p.get_num_pages()): #print "Processing page %d"%(pagenr,) page = p.parse_page_to_items(pagenr) lines = page.get_lines(page.get_all_items()) allofit = " ".join(lines) allofit = allofit.replace( u"along the Swedish/Norwegian and Swedish/Finnish border to", u"Along the common X/Y state boundary to") allofit = allofit.replace(u"–", "-") coordarea = re.match( ur".*Mountainous\s+area\s+of\s+Sweden.{1,10}lateral\s+limits(.*?)AIRAC.*", allofit) if coordarea: points = [] txt, = coordarea.groups() print "area:<", txt, ">" points = mapper.parse_coord_str(txt, context="sweden") assert (len(points) > 3) print "Point:", len(points) areas.append( dict(name="Mountainous Area", floor="GND", ceiling="UNL", points=points, type="mountainarea", freqs=[])) print len(areas) assert len(areas) == 1 return areas
def execute(self): self._pool = Pool(config.pool_processors) self._thread_pool = ThreadPool(5) cli_stats = CliStats(self._context, self._writer) cli_stats.execute() self.clean_up_docker() logging.info(config.log_line_run_end + self._context.run_name) _flush_log_handlers() _extract_from_file(config.log_file, config.run_log, config.log_line_run_start + self._context.run_name, config.log_line_run_end + self._context.run_name) parser = Parser(self._context, self._writer) parser.execute() _collect_general_information() self._context.step_times.append( StepTimes(time.time(), 'postprocessing_end')) self._writer.write_csv(config.step_times_csv_file_name, StepTimes.csv_header, self._context.step_times) _create_report() self._pool.close() self._thread_pool.close() logging.info('Executed post processing')
def parse_mountain_area(): p=Parser("/AIP/ENR/ENR%201/ES_ENR_1_1_en.pdf") #alongborder="610213N 0114917E - 632701N 0114917E - 661457N 0141140E - 682200N 0173441E - 683923N 0183004E - 683141N 0194631E - 690945N 0202604E - 683533N 0221411E - 680424N 0233833E - 670159N 0240734E - 663602N 0240455E - " areas=[] for pagenr in xrange(p.get_num_pages()): #print "Processing page %d"%(pagenr,) page=p.parse_page_to_items(pagenr) lines=page.get_lines(page.get_all_items()) allofit=" ".join(lines) allofit=allofit.replace(u"along the Swedish/Norwegian and Swedish/Finnish border to", u"Along the common X/Y state boundary to" ) allofit=allofit.replace(u"–","-") coordarea=re.match(ur".*Mountainous\s+area\s+of\s+Sweden.{1,10}lateral\s+limits(.*?)AIRAC.*",allofit) if coordarea: points=[] txt,=coordarea.groups() print "area:<",txt,">" points=mapper.parse_coord_str(txt,context="sweden") assert(len(points)>3) print "Point:",len(points) areas.append(dict( name="Mountainous Area", floor="GND", ceiling="UNL", points=points, type="mountainarea", freqs=[])) print len(areas) assert len(areas)==1 return areas
def run_parser(self, file_path): """Call parse_network() from path specified. To do so first reinitzialize all modules and cycles_completed. """ # clear all at the begging self.cycles_completed = 0 self.names = Names() self.devices = Devices(self.names) self.network = Network(self.names, self.devices) self.monitors = Monitors(self.names, self.devices, self.network) self.scanner = Scanner(file_path, self.names) self.parser = Parser(self.names, self.devices, self.network, self.monitors, self.scanner) # Capture the stdout from parse_network() captured_stdout = io.StringIO() with redirect_stdout(captured_stdout): if self.parser.parse_network(): self.parse_success = True self.log_message(_("Succesfully parsed network.")) else: self.parse_success = False self.log_message(_("Failed to parse network.")) # Show error messages captured in activity log self.log_message(captured_stdout.getvalue(), self.MONOSPACE_FONT)
def yield_parse_result(line): std_operator_parser = Parser('{indent}std::operator{op}({params});') if (result := std_operator_parser.parse(line) ) and result.named['op'] in STREAM_OPERATORS: indent, op, params = result.named[ 'indent'], ' ' + result.named['op'], result.named['params'] return indent + op.join(params.split(',')) + ';'
def run(): data = open_file(argv[1]) lexer = Lexer(data) tokens = lexer.lex() parser = Parser(tokens) parser.parse()
def main(): url = str(sys.argv[-3]) url_for_proxy_check = str(sys.argv[-2]) path = sys.argv[-1] chrome = webdriver.Chrome() chrome.get(url) hide = HidemyName(chrome) data = hide.main() extract = Parser(data, path) result = extract.excute() """ >>> result = { "address": ["1.1.1.1","2.2.2.2"], "port": ["3128","8080"], "country": [],... } >>> proxy_list = [n+":"+p for n,p in zip(result['address'],result['port'])] >>> proxy_list ["1.1.1.1:3128","2.2.2.2:8080"] """ proxy_list = [ n + ":" + p for n, p in zip(result['address'], result['port']) ] print("CHECING PROXIES FOR %s ".center(80, "#") % url_for_proxy_check) final = check(url_for_proxy_check, proxy_list) df = pd.DataFrame(final).to_csv(path, index=False) print("Done".center(80, "-"))
def yield_parse_result(line1, line2): subsitution_parser = Parser('{}v{var_name} = {value};') if (result := subsitution_parser.parse(line1) ) and 'v' + result.named['var_name'] in line2 and any( op in line2 for op in STREAM_OPERATORS): var_name, value = result.named['var_name'], result.named['value'] return '', line2.replace('v' + var_name, value)
def evaluate(expression: str, is_rad: bool, is_binary: bool) -> str: """Evaluates a mathematical expression passed as a string and returns the result as another string. Args: expression (str): Expression to evaluate is_rad (str): Determines if in radian mode is_binary (str): Determines if the input and output is in binary Returns: str: Result of evaluation of expression """ if exp_is_blank(expression): return "" parser = Parser(is_rad, is_binary) try: # Make implicit multiplications between bracketed items explicit. expression = re.sub('(?<=\d|\))(\()', '*(', expression) # Ensure that characters used can be read by parser. # Map euler's constant to the letter e when not surrounded by other letters expression = re.sub('(?![a-zA-Z])e(?![a-zA-Z])', 'E', expression) expression = expression.replace('π', 'PI') expression = expression.replace('√', 'sqrt') # Evaluate expression evaluation = parser.evaluate(expression) if is_binary: evaluation = display.decimal_to_binary(evaluation) return evaluation except Exception as e: return str(e)
def type(self, string): ''' @param string: 'string' | 'int' | 'float' | 'bool' | 'dict' | 'array' ''' tp = type(self.__param) if string == 'string': return tp == types.StringType elif string == 'int': tmp = Parser.int(self.__param, None) if tmp != None: self.__param = tmp tp = type(self.__param) return tp == types.IntType elif string == 'float': tmp = Parser.float(self.__param, None) if tmp != None: self.__param = tmp tp = type(self.__param) return tp == types.FloatType elif string == 'bool': return tp == types.BooleanType elif string == 'dict': return tp == types.DictionaryType elif string == 'array': return tp == types.ListType return False
def solve(self, expr: str = ""): """solve cryptarithm problem""" print("Problem: {}".format(expr)) p = Parser(tokenize(expr)) pr = Problem(p.parse()) print(pr.search_all_solution())
def test_magnesium_hydroxide(self): p = Parser() self.assertEqual(p.parse_molecule(magnesium_hydroxide), { 'Mg': 1, 'O': 2, 'H': 2 })
class Crawler(): def __init__(self, params): self.params = params self.log = {"fetching": None, "crawling": None} self.results = None self._request = None self._parser = None def get_all(self): self._request = RequestSinglePage(params=self.params) self._request.get() self.log["fetching"] = self._request.log if not self._request.log.successful: self.results = None return None self._parser = Parser(self._request.page_content) self._parser.extract_fields() self.results = self._parser.results self.log["crawling"] = self._parser._log self.results_df = pd.DataFrame(self.results) def get_pandas_df(self): return self.results_df
def test_numbers(self): tokens = [ Token(TokenType.NUMBER, 27), Token(TokenType.PLUS), Token(TokenType.NUMBER, 14) ] node = Parser(tokens).parse() self.assertEqual(node, AddNode(NumberNode(27), NumberNode(14))) tokens = [ Token(TokenType.NUMBER, 27), Token(TokenType.MINUS), Token(TokenType.NUMBER, 14) ] node = Parser(tokens).parse() self.assertEqual(node, SubtractNode(NumberNode(27), NumberNode(14))) tokens = [ Token(TokenType.NUMBER, 27), Token(TokenType.MULTIPLY), Token(TokenType.NUMBER, 14) ] node = Parser(tokens).parse() self.assertEqual(node, MultiplyNode(NumberNode(27), NumberNode(14))) tokens = [ Token(TokenType.NUMBER, 27), Token(TokenType.DIVIDE), Token(TokenType.NUMBER, 14) ] node = Parser(tokens).parse() self.assertEqual(node, DivideNode(NumberNode(27), NumberNode(14)))
def ingest_data_and_respond(data, log): log.info('Got POSTed data: {}'.format(data)) timeseries_df, err = Parser.validate_parse_consumption_data(data, log) if err != '': return err, HTTPStatus.BAD_REQUEST ship_id = data['spaceship_id'] units = data['units'].lower() if units == 'kwh': timeseries, err = Parser.split(timeseries_df, log) elif units == 'kw': timeseries, err = Parser.convert_and_split(timeseries_df, log) if err != '': return err, HTTPStatus.BAD_REQUEST log.debug('Saving dataframe: {} \nfor ship_id: : {}'.format( timeseries, ship_id, )) if not DBManager.save_energy_entry(ship_id, timeseries): log.error('db save failed for ship: {}'.format(ship_id)) log.error('timeseries: {}'.format(timeseries)) res = DBManager.get_full_energy_entry(ship_id) log.error('full ship info in db befre fail: {}'.format(res)) return 'DB error', HTTPStatus.SERVICE_UNAVAILABLE return 'Data saved successfully for ship {}'.format(ship_id)
def __init__(self): self.pa=Parser() self.pp=PinYin() self.pp.load_word() with open(os.path.join(os.path.dirname(__file__),'pinyin_dict'),'r') as ff: line=ff.readline() self.jj_dict=json.loads(line) ff.close()
def test_fremy_salt(self): p = Parser() self.assertEqual(p.parse_molecule(fremy_salt), { 'K': 4, 'O': 14, 'N': 2, 'S': 4 })
def __init__(self, args): Parser.__init__(self, args) self.heuristique = args.heur self.size = self.size self.solvable = True self.start_map = np.zeros(self.size) self.final_map = np.zeros(self.size) self.curent_state = (np.zeros(self.size), self.size * self.size)
def __init__(self, db_config, create_new_tables=False): self._db = Postgres_db(db_config) self._parser = Parser() if create_new_tables is True: try: self._db.drop_existing_tables_from_db() except Exception as e: logging.exception(e) self._db.create_tables()
def __init__(self, config): self.logger = logging.getLogger() self.connector = Connector(config.server.host, config.server.port, config.server.enablessl, config.server.crtfile, config.server.timeout, ) self.parser = Parser(config.main.serializer)
def add(): expression = text = request.form['expression'] p = Parser(expression) value = p.getValue() now = datetime.utcnow() db.session.add(Expression(text=expression, value=value, now=now)) db.session.commit() return redirect(url_for('index'))
def intermediate(code): parser = Parser(code) block = parser.parse() # print(block) fd = {'params': {'var': True, 'params': []}, 'block': block} info = new_func_info(None, fd) info.add_local_var('_ENV') cg_func_def_exp(info, fd, 0) # print(info.sub_funcs[0].ins) return info
def repl_core(input_file, output_file): base_scope = create_built_in_scope() scope = Scope(base_scope) parser = Parser(file_token_stream(input_file)) for paragraph in parser.iter_paragraph(scope): result = evaluate(paragraph, scope) if isinstance(result, Action): result.do(scope) else: print(result, file=output_file)
def main(args): parser = Parser(prog=utils.abs_path('./http_server.py')) parser.add_argument("-port", default=gv.cdn_port(), help="port will run: default 8000") parser.add_argument("-address", default='', help="address bind, default any") parser.add_argument("-path", default=gv.cdn_path(), help="path will run http, default :" + utils.abs_path(gv.cdn_path())) arguments = parser.parse_args(args) run(arguments.port, arguments.address, arguments.path) # parser = argparse.ArgumentParser() # parser.add_argument('--cgi', action='store_true', # help='Run as CGI Server') # parser.add_argument('--bind', '-b', default='', metavar='ADDRESS', # help='Specify alternate bind address ' # '[default: all interfaces]') # parser.add_argument('port', action='store', # default=8000, type=int, # nargs='?', # help='Specify alternate port [default: 8000]') # # args = parser.parse_args() # if args.cgi: # handler_class = CGIHTTPRequestHandler # else: # handler_class = SimpleHTTPRequestHandler # http.server.test(HandlerClass=handler_class, port=args.port, bind=args.bind) pass
def preview(id): record = db_session.query(Page).get(id) if record is not None: parser = Parser() html = parser.parse(record.data) html = html.replace('</body>', '{{ post_body|safe }}</body>') last_saved = record.updated.strftime('%B %d, %Y at %I:%M%p') post_body = render_template('preview_post_body.html', last_saved=last_saved) return render_template_string(html, post_body=post_body), return render_template('empty_preview.html')
def parse(): logger.info("parse") storage = Persistor() parser = Parser() raw_data = storage.read_raw_data(SCRAPPED_FILE) data = parser.process_rawdata(raw_data) #processing raw data parsed_files = [parser.parse_object(file) for file in data] #parsing every object storage.save_csv(parsed_files, TABLE_FORMAT_FILE) #save our data
def main(): """filename = '' try: filename = sys.argv[1] except Exception as e: print(e)""" while True: lexer = Lexer(text=input('>>> ')) parser = Parser(lexer) print(parser.read())
def test_parser(self): cases = [('(1+2)<>3', ['1', '2', '+', '3', '<>']), ('1+2*3', ['1', '2', '3', '*', '+']), ('1 >= 2', ['1', '2', '>=']), ('(1*2+9) = (2*3-6)', ['1', '2', '*', '9', '+', '2', '3', '*', '6', '-', '='])] parser = Parser() for expr, correct in cases: self.assertEqual(parser.parse(expr), correct)
def run(s): scanner = Scanner(s, error) tokens = scanner.scan_tokens() parser = Parser(tokens, parse_error) statements = parser.parse() if hasError: return if hasRuntimeError: return interpreter = Interpreter() interpreter.interpret(statements)
def load_basic_tools(fname="basic.gl"): parser = Parser() parser.parse_file(fname) tool_dict = parser.tool_dict basic_tools = ImportedTools(tool_dict) basic_tools.line.add_symmetry((1, 0)) basic_tools.dist.add_symmetry((1, 0)) basic_tools.intersection_ll.add_symmetry((1, 0)) basic_tools.midpoint.add_symmetry((1, 0)) add_movable_tools(tool_dict, basic_tools) return ImportedTools(tool_dict)
def getReply(message): p = Parser() parse_tree = p.parse(message) translation = list() find_best_translation(parse_tree, translation) print(message) print(translation) answer = " ".join(translation) # return the formulated answer return answer
def test_error_location(names, devices, network, monitors, capsys): """Test if error detection correctly prints out location of error """ sc = Scanner(error_location, names) parser = Parser(names, devices, network, monitors, sc) parser.parse_network() captured = capsys.readouterr() line_number = "line 10" try: assert (line_number in captured.out) except AttributeError: assert (line_number in captured[0])
def ey_parse_airfield(icao): spaces=[] p=Parser("/EY_AD_2_%s_en.pdf"%(icao,),lambda x:x) freqs=[] for nr in xrange(0,p.get_num_pages()): page=p.parse_page_to_items(nr) if nr==0: #[–-] nameregex=ur"\s*%s\s*[–-]\s*(.*?)\s*$"%(icao,) print "Nameregex",nameregex nameitem=page.get_by_regex(nameregex,re.UNICODE)[0] name,=re.match(nameregex,nameitem.text,re.UNICODE).groups() name=name.replace("Tarptautinis","International") #print repr(name) #sys.exit(1) coordhdg,=page.get_by_regex(ur".*ARP\s*koordinat.s.*",re.DOTALL) coord=page.get_partially_in_rect( coordhdg.x2+4,coordhdg.y1+0.1,100,coordhdg.y2-0.1)[0] pos,=mapper.parsecoords(fixup(coord.text.replace(" ",""))) elevhdg,=page.get_by_regex(ur".*Vietos\s*aukštis.*",re.DOTALL) elevitem,=page.get_partially_in_rect( elevhdg.x2+1,elevhdg.y1+0.1,100,elevhdg.y2-0.1) elev,=re.match(ur"(\d+)\s*FT.*",elevitem.text).groups() elev=int(elev) for comm in page.get_by_regex(ur".*ATS\s*COMMUNICATION\s*FACILITIES.*",re.DOTALL): ends=page.get_by_regex_in_rect( ur".*RADIO\s*NAVIGATION.*", 0,comm.y2,100,100) if ends: end=ends[0].y1-0.1 else: end=100 freqitems=page.get_by_regex_in_rect( ur".*\d{3}\.\d{3}.*", 0,comm.y2,100,end-0.1) lastservice=None for freq in freqitems: service=page.get_partially_in_rect( 0,freq.y1+0.1,17,freq.y2-0.1) if service: lastservice=service[0] print lastservice assert len(spaces)==0 for freqstr in re.findall(ur"\d{3}\.\d{3}",freq.text): if freqstr!="121.500" and freqstr!="243.000": freqs.append((lastservice.text.split("/")[0],float(freqstr)))
def messageParcer(self): logging.debug("messageParcer in thread " + str(thread.get_ident())) parser = None # get parsing patterns from config file when in testing mode if self.testEnabled: parser = Parser(self.successPattern, self.failurePattern, self.testEnabled) else: parser = Parser() while self.running: msg = queue.get() eventLog = parser.parseLogLine(msg) if eventLog: algorithm.processEventLog(eventLog) logging.debug("messages in queue " + str(queue.qsize()) + ", received %r from %s:%d" % (msg.data, msg.host, msg.port))
def __init__(self, config): connector = Connector(config.server.host, config.server.port, #config.server.keyfile, config.server.crtfile, config.server.timeout, ) self.socket = connector.connect() self.parser = Parser(config.main.serializer)
class ClientEndpoint(object): socket = None parser = None def __init__(self, config): connector = Connector(config.server.host, config.server.port, #config.server.keyfile, config.server.crtfile, config.server.timeout, ) self.socket = connector.connect() self.parser = Parser(config.main.serializer) def request(self, data): pack = self.parser.encode(data) self.socket.sendall(pack) #self.socket.write(pack) response = self.socket.read(1024) return self.parser.decode(response) #return self.parser.decode(self.socket.read(1024)) #return self.parser.decode(self._recv()) def _recv(self, n=1): data = "" chunk = "" while len(data) < n: try: chunk = self.socket.recv(n - len(data)) #chunk = self.socket.read(n - len(data)) except Exception, e: print "SSL read failed: %s" % str(e) if len(chunk) == 0: break data += chunk print "\033[33mdata: %s\033[0m" % str(data) return data
def extract_single_sup(full_url,sup,supname,opening_ours): #print getxml("/AIP/AD/AD 1/ES_AD_1_1_en.pdf") ads=[] try: p=Parser(sup) except Exception: print "Could't parse",sup #Some AIP SUP's contain invalid XML after conversion from PDF. #skip these for now return [] areas=[] startpage=None for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) #print page.get_all_items() for item in page.get_by_regex(".*HOURS OF OPERATION.*"): lines=page.get_lines(page.get_partially_in_rect(0,item.y1-2,100,item.y2+2)) found=False for line in lines: if re.match(ur".*SUP\s*\d+/\d{4}\.?\s+HOURS OF OPERATION\s*$",line): opening_ours.add(p.get_url()) print "Found hours:",opening_ours try: for areaname,coords,meta in find_areas(page): if areaname: name="%s (on page %d of %s)"%(areaname,pagenr+1,supname) else: name="Area on page %d of %s"%(pagenr+1,supname) print "Number of points",len(coords) areas.append(dict( url=full_url, pagenr=pagenr+1, sup=supname, name=name, type='aip_sup', points=coords)) except Exception: pass
def main(fileObj): global_env = make_builtins() tokenizer = Tokenizer(PeekableStream(fileObj), False) tokenList = tokenizer.read_all() tokenString = tokenizer.as_string() # print tokenString parser = Parser(tokenList) code = parser.read_all() codeString = parser.as_string() # print codeString env = Dict(global_env) val = code.evaluate(env) result = val.call(None, List([]), env) if result is not None: print result.as_string("") else: print None print "-"*50
def compile(p, debug = False): def print_tokens(tokens): for t in tokens: print(t) lexer = Lexer(p) parser = Parser(lexer) tokens = parser.parse() handler = Handler() if debug: print_tokens(tokens) nfa_stack = [] for t in tokens: handler.handlers[t.name](t, nfa_stack) assert len(nfa_stack) == 1 return nfa_stack.pop()
class Pipeline(): def __init__(self): self.session = requests.Session() self.session.headers = {'user-agent': 'shr-podcasts-bot'} self.scraper = Scraper(self.session) self.parser = Parser(self.session) self.storage = Storage() def run(self, root, start_page): podcasts = (self.parser.parse_feed(feed) for feed in self.scraper.scrape(root, start_page)) for podcast in filter(None, podcasts): self.storage.store_podcast(podcast)
def parse_sig_points(): p=Parser("/AIP/ENR/ENR 4/ES_ENR_4_4_en.pdf") points=[] for pagenr in xrange(p.get_num_pages()): #print "Processing page %d"%(pagenr,) page=p.parse_page_to_items(pagenr) lines=page.get_lines(page.get_all_items(),order_fudge=20) for line in lines: cols=line.split() if len(cols)>2: coordstr=" ".join(cols[1:3]) #print cols if len(mapper.parsecoords(coordstr))>0: crd=mapper.parsecoord(coordstr) #print "Found %s: %s"%(cols[0],crd) points.append(dict( name=cols[0], kind='sig. point', pos=crd)) p=Parser("/AIP/ENR/ENR 4/ES_ENR_4_1_en.pdf") for pagenr in xrange(p.get_num_pages()): page=p.parse_page_to_items(pagenr) nameheading,=page.get_by_regex(r".*Name of station.*") freqheading,=page.get_by_regex(r".*Frequency.*") coordheading,=page.get_by_regex(r".*Coordinates.*") items=sorted(list(x for x in page.get_partially_in_rect(nameheading.x1,nameheading.y2+2,nameheading.x1+1,100) if x.text.strip()),key=lambda x:x.y1) idx=0 while True: if items[idx].text.strip()=="": idx+=1 continue if idx+1>=len(items): break name=items[idx] kind=items[idx+1] diffy=kind.y1-name.y2 #print "Name, kind:",name,kind #print name.text,kind.text,diffy assert kind.text.count("VOR") or kind.text.count("DME") or kind.text.count("NDB") assert diffy<0.5 #print "Frq cnt: <%s>"%(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05),) freqraw=" ".join(page.get_lines(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05))) short,freq=re.match(r"\s*([A-Z]{2,3})?\s*(\d+(?:\.?\d+)\s+(?:MHz|kHz))\s*(?:H24)?\s*",freqraw).groups() posraw=" ".join(page.get_lines(page.get_partially_in_rect(coordheading.x1,name.y1+0.05,coordheading.x2,kind.y2-0.05))) #print "Rawpos<%s>"%(posraw,) pos=mapper.parse_coords(*re.match(r".*?(\d+\.\d+[NS]).*?(\d+\.\d+[EW]).*",posraw).groups()) #print "Name: %s, Shortname: %s, Freq: %s,pos: %s"%(name.text,short,freq,pos) points.append(dict( name=short+" "+kind.text.strip()+" "+name.text.strip(), short=short, kind="nav-aid", pos=pos, freq=freq)) idx+=2 return points
def publish(id): record = db_session.query(Page).get(id) if record is not None: parser = Parser() html = parser.parse(record.data) if record.slug is not None: dir_name = record.slug else: dir_name = record.key if record.access == 'public': file_path = '%s%s' % (config['generate']['public_path'], dir_name) url = dir_name elif record.access == 'limited': file_path = '%s%s' % (config['generate']['limited_path'], dir_name) url = '%s?key=%s' % (dir_name, record.key) else: # access is private so do nothing return jsonify(published=record.published.strftime('%B %d, %Y at %I:%M%p')) if not os.path.exists(file_path): os.makedirs(file_path) full_path = '%s/index.html' % file_path with open(full_path, "w") as fh: fh.write(html) # set published date record.published = datetime.now() db_session.add(record) db_session.commit() return jsonify( published_date=record.published.strftime('%B %d, %Y at %I:%M%p'), url=url ) return jsonify(error=True, message="Not found")
def __init__(self): self.__invertedindex = IndexBuilder().index self.pp = Parser() self.pp.normalize("a") self.pagerank = [] with open("urllist", "r") as f1: # 打开文件urllist self.__num1 = int(f1.readline()) # 总url数目 self.urllist = [] n = 0 while n < self.__num1: # 将url信息存入字典中 s = f1.readline() arr = s.split(" ") # urlid = int(arr[0]) #url ID url = arr[1] # url地址 indegree = int(arr[2]) # url入度:用于计算PageRank outdegree = int(arr[3]) # url出度 length_of_texts = int(arr[4]) self.urllist.append([url, indegree, outdegree, length_of_texts]) n = n + 1 with open("pagerank", "r") as file: for line in file: self.pagerank.append(float(line))
data = self.data["testing"]["data"][i].T labels = self.data["testing"]["labels"][i].T else: data = np.append(data, self.data["testing"]["data"][i].T, axis=0) labels = np.append(labels, self.data["testing"]["labels"][i].T, axis=0) return data, labels def testing(self, c=20): k = c/(100/self.k) data, labels = None, None for i in range(self.k-1, self.k-k-1, -1): if data is None and labels is None: data = self.data["testing"]["data"][i].T labels = self.data["testing"]["labels"][i].T else: data = np.append(data, self.data["testing"]["data"][i].T, axis=0) labels = np.append(labels, self.data["testing"]["labels"][i].T, axis=0) return data, labels if __name__=="__main__": vote_parser = Parser('../data/vote/vote.config', '../data/vote/vote.data') vote_parser.parse_config() D = vote_parser.parse_data() print D.shape ucifolder = UCIFolder(D, normalize=False, shuffle=False) for c in [5,10,15,20,30,50,80]: train_data, train_labels = ucifolder.training(c) test_data, test_labels = ucifolder.testing() print train_data.shape, test_data.shape
if i<length-1: attach[position[i+1]]+=1 i+=1 else: notend=False else: cpl.append(phrase_list) return cpl if __name__ == '__main__': cc=correction() word=raw_input('输入纠正词: ').decode('utf-8') choose=input('输入选择:1.普通查找 2.精确查找\n') if choose==1: pp=Parser() phrase_list=pp.normalize(word) else: if choose==2: i=0 phrase_list=[] while i < len(word): phrase_list.append(word[i]) i+=1 '''for ii in phrase_list: print(ii.encode('utf-8'))''' print('________________before correct________________') ll=cc.correct(phrase_list) if len(ll)==0: print('no correct') else:
def __init__(self): self.session = requests.Session() self.session.headers = {'user-agent': 'shr-podcasts-bot'} self.scraper = Scraper(self.session) self.parser = Parser(self.session) self.storage = Storage()
class searcher: # IndexBuilder().index # ... = Parser() # ....normalize(str) #['word','word'...] # 定义构造方法 def __init__(self): self.__invertedindex = IndexBuilder().index self.pp = Parser() self.pp.normalize("a") self.pagerank = [] with open("urllist", "r") as f1: # 打开文件urllist self.__num1 = int(f1.readline()) # 总url数目 self.urllist = [] n = 0 while n < self.__num1: # 将url信息存入字典中 s = f1.readline() arr = s.split(" ") # urlid = int(arr[0]) #url ID url = arr[1] # url地址 indegree = int(arr[2]) # url入度:用于计算PageRank outdegree = int(arr[3]) # url出度 length_of_texts = int(arr[4]) self.urllist.append([url, indegree, outdegree, length_of_texts]) n = n + 1 with open("pagerank", "r") as file: for line in file: self.pagerank.append(float(line)) def search_cos(self, query, pagerank=True): querydict_tf = {} weight = {} scoredict = {} length = 0 heap = [] urlids = [] self.querylist = self.pp.normalize(query) totaldoc = len(self.urllist) for item in self.querylist: if item in querydict_tf: querydict_tf[item] += 1 else: querydict_tf[item] = 1 for item in querydict_tf.iterkeys(): if item in self.__invertedindex: weight[item] = (1.0 + math.log10(querydict_tf[item])) * math.log10( 1.0 * totaldoc / self.__invertedindex[item][0] ) else: weight[item] = 0 i = 0 while i < self.__num1: score = 0 for item in weight.iterkeys(): if item in self.__invertedindex and str(i) in self.__invertedindex[item][1]: score += weight[item] * self.__invertedindex[item][1][str(i)][1] if pagerank: score *= self.pagerank[i] uid = id_score(i, score) if uid.score > 0: if len(heap) <= 50: heapq.heappush(heap, uid) else: heapq.heappushpop(heap, uid) i += 1 # 输出 while len(heap) > 0: tmp = heapq.heappop(heap).urlid urlids.append(tmp) urlids.reverse() return urlids # boolean search def boolean(self, query): query = self.pp.normalize(query) # 解析query # character = [] # for term in query: # print type(term) # query.append(term) character_set = list(set(query)) # 去重 # 根据term的倒排索引数目排序 # character_set = [] # for term in character: # T = (term, len(self.__invertedindex[term][1])) # character_set.append(T) # character_set.sort(lambda x, y: cmp(x[1], y[1])) # 获取倒排文件索引 finalindex = self.__invertedindex.get(character_set[0], [0, {}, 0])[1].keys() # 获得第一个term的倒排文件索引 for term in character_set: if finalindex: index = self.__invertedindex.get(term, [0, {}, 0])[1].keys() # 获得第i个term的倒排文件索引 finalindex = list(set(finalindex) & set(index)) else: return finalindex heap = [] for url in finalindex: score = 0 for term in character_set: score = score + self.__invertedindex.get(term, [0, {}, 0])[1][url][0] heap.append(id_score(int(url), score)) heapq.heapify(heap) urlids = [] while len(heap) > 0: tmp = heapq.heappop(heap).urlid urlids.append(tmp) urlids.reverse() return urlids def gettitle(url): try: req_header = { "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6" } req = urllib2.Request(url, None, req_header) page = urllib2.urlopen(req, None, 54) html = page.read() page.close() soup = BeautifulSoup(html) title = soup.title title = title.string except Exception as e: print e title = None return title
flt = Tok(r'[\+\-]?(\d+\.)|(\d*\.\d+)') < Float int_ = Tok(r'[\+\-]?\d+') < Int nam = Tok(r'\w+') < Name var = nam plus = Tok(r'\+') dash = Tok(r'\-') star = Tok(r'\*') slash = Tok(r'\/') mod = Tok(r'\%') dstar = Tok(r'\*\*') opar = Tok(r'\(') cpar = Tok(r'\)') equal = Tok(r'\=') kw_print = Tok(r'print') exprs = Parser() exprl = Parser() expr = Parser() prim = Parser() expo = Parser() sign = Parser() fact = Parser() summ = Parser() asgn = Parser() prin = Parser() prim.parser = flt | int_ | var | opar + expr - cpar expo.parser = ( (prim - dstar & expo < Pow) | prim )
def call(self, vm, arg): from parse import Parser, ParseError return Parser.parse_line(vm, arg)
def test_parse_user_ids(self): parser = Parser() parser.fft_orders = self.order_hash self.assertEqual([7,8,9,1], parser.user_ids_from_orders(parser.fft_orders))
end = Tok(r'$') < (lambda _: None) num = Tok(r'[\+\-]?(\d+\.?)|(\d*\.\d+)') < float nam = Tok(r'\w+') var = nam < (lambda x : env[x]) plus = Tok(r'\+') dash = Tok(r'\-') star = Tok(r'\*') slash = Tok(r'\/') mod = Tok(r'\%') dstar = Tok(r'\*\*') opar = Tok(r'\(') cpar = Tok(r'\)') equal = Tok(r'\=') rr = Tok(r'\>\>') exprs = Parser() expr = Parser() prim = Parser() expo = Parser() sign = Parser() fact = Parser() summ = Parser() asgn = Parser() prin = Parser() prim.parser = num | var | opar + expr - cpar expo.parser = ( (prim - dstar & expo < (lambda a, b: a ** b)) | prim ) sign.parser = (
code = parser.run(buf) secd.c = [code] secd.update() secd.clear() buf = [] i += 1 print print 'secd %d :> ' % i, else: buf.append(line) i += 1 print print 'secd %d :> ' % i, if __name__ == '__main__': secd = SECDMachine() parser = Parser() if len(sys.argv) == 2: print 'run script:', sys.argv[1] code = parser.run_from_file(sys.argv[1]) secd.c = [code] print secd secd.update() else: print 'dialog mode' dialog(secd, parser)
if __name__ == "__main__": import getopt, sys opts, args = getopt.getopt(sys.argv[1:], "s") use_stemming = False for o, a in opts: if o in ('-s','--stemming'): use_stemming = True print 'use stemming: %d' % use_stemming filelist = [(path + f) for f in os.listdir(path)] parser = Parser(fstopname) for stem in [use_stemming,]: for idf in True, False: print 'Parsing files...', stdout.flush() parser.parse(filelist[:10000], stem) # Ignore the 30% least and most frequent words parser.words = slice_sorted_words(parser.words, 30) print 'done' print 'Normalizing frequencies...', stdout.flush() # Don't modify the original set for i, doc in enumerate(parser.docset): normalize(doc, parser.words, idf) print i
def __init__(self, config_file, data_file): vote_parser = Parser(config_file, data_file) vote_parser.parse_config() self.D = vote_parser.parse_data()
def test_evaluator(): from parse import Parser from lex import Tokenizer, mockfile from function import make_builtins global_env = make_builtins() test_list = """ # NEXT 4; # NEXT def x = 4; # NEXT def x = 4; x; # NEXT def x = ["a", "b"]; x; # NEXT def x = ["a", "b"]; x[0]; # NEXT ["a", "b"][0]; # NEXT {4;}[]; # NEXT ![x]{x;}; # NEXT ![x]{x;}[5]; # NEXT def x = 6; def ret4 = ![x]{ set x = 4; x; }; set x = 9; ret4[x]; x; # NEXT def a = 1; def b = (); def c = ( d=3); # NEXT 4.str; # NEXT 4.str[]; # NEXT 4.add[2]; # NEXT def add4 = { ![y]{4.add[y]} }; add4[][1]; # NEXT def addN = ![n]{ ![y]{n.add[y]} }; var add2 = addN[2]; add2[9]; # NEXT true; # NEXT true.ifTrue[{4},{5}]; # NEXT false.ifTrue[{4},{5}]; # NEXT File; # NEXT File["../README.md"]; # NEXT def x = File["../README.md"]; x.read[1]; var y = x.read[5]; x.close[]; y; # NEXT def x = ( y=4 ); def n = { inc x; y }; n[]; # NEXT def x = { def y = 4; Frame[]; }; x[]; # NEXT def Statement = { var kind; def str = {Error["not implemented"]}; def call = {Error["not implemented"]}; def eval = {Error["not implemented"]}; def get = {Error["not implemented"]}; Frame[]; }[]; Print["Statement = "]; Print[Statement]; def Var = { inc Statement; set kind = "var"; var name; var chain; Frame[]; }[]; Print["Var = "]; Print[Var]; 4; """.split("# NEXT") print "-"*50 for inputString in test_list: print inputString.strip() tokenizer = Tokenizer(mockfile(inputString), False) tokenList = tokenizer.read_all() tokenString = tokenizer.as_string() print tokenString parser = Parser(tokenList) code = parser.read_all() codeString = parser.as_string() print codeString env = Dict(global_env) val = code.evaluate(env) result = val.call(None, List([]), env) if result is not None: print result.as_string("") else: print None print "-"*50
tab = 0 for i in range(len(para)): sys.stdout.write(para[i]) if para[i] == "(": tab += 1 elif para[i] == ")": tab -= 1 if (i+1 < len(para) and para[i+1] != ')'): sys.stdout.write('\n' + tab*'\t') if __name__ == '__main__': if (len(sys.argv) < 2): print "Error" else: file = sys.argv[1] sentenceFile = sys.argv[2] gram = parseGrammar(file) p = Parser(gram) p.parseSentenceFile(sentenceFile, gram) if ("-t" in sys.argv): output = prettyPrint('ROOT', "", gram) print output #formatPretty(output) #prettyprint here
def extract_airfields(filtericao=lambda x: True, purge=True): # print getxml("/AIP/AD/AD 1/ES_AD_1_1_en.pdf") ads = [] p = Parser("/AIP/AD/AD 1/ES_AD_1_1_en.pdf") points = dict() startpage = None for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) if page.count("Aerodrome directory"): startpage = pagenr break if startpage == None: raise Exception("Couldn't find aerodrome directory in file") # print "Startpage: %d"%(startpage,) # nochartf=open("nochart.txt","w") for pagenr in xrange(startpage, p.get_num_pages()): row_y = [] page = p.parse_page_to_items(pagenr) allines = [x for x in (page.get_lines(page.get_partially_in_rect(0, 0, 15, 100))) if x.strip()] for item, next in zip(allines, allines[1:] + [""]): # print "item:",item m = re.match(ur"^\s*[A-ZÅÄÖ]{3,}(?:/.*)?\b.*", item) if m: # print "Candidate, next is:",next if re.match(r"^\s*[A-Z]{4}\b.*", next): # print "Matched:",item # print "y1:",item.y1 row_y.append(item.y1) for y1, y2 in zip(row_y, row_y[1:] + [100.0]): # print "Extacting from y-range: %f-%f"%(y1,y2) items = list(page.get_partially_in_rect(0, y1 - 0.25, 5.0, y2 + 0.25, ysort=True)) if len(items) >= 2: # print "Extract items",items ad = dict(name=unicode(items[0].text).strip(), icao=unicode(items[1].text).strip()) # print "Icao:",ad['icao'] assert re.match(r"[A-Z]{4}", ad["icao"]) if not filtericao(ad): continue if len(items) >= 3: # print "Coord?:",items[2].text m = re.match(r".*(\d{6}N)\s*(\d{7}E).*", items[2].text) if m: lat, lon = m.groups() ad["pos"] = parse_coords(lat, lon) # print "Items3:",items[3:] elev = re.findall(r"(\d{1,5})\s*ft", " ".join(t.text for t in items[3:])) # print "Elev:",elev assert len(elev) == 1 ad["elev"] = int(elev[0]) ads.append(ad) big_ad = set() for ad in ads: if not ad.has_key("pos"): big_ad.add(ad["icao"]) for ad in ads: icao = ad["icao"] if icao in big_ad: if icao in ["ESIB", "ESNY", "ESCM", "ESPE"]: continue try: p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6_1_en.pdf" % (icao, icao)) except: p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6-1_en.pdf" % (icao, icao)) ad["aipvacurl"] = p.get_url() for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) """ for altline in exitlines: m=re.match(r"(\w+)\s+(\d+N)\s*(\d+E.*)",altline) if not m: continue name,lat,lon=m.groups() try: coord=parse_coords(lat,lon) except Exception: continue points.append(dict(name=name,pos=coord)) """ for kind in xrange(2): if kind == 0: hits = page.get_by_regex(r"H[Oo][Ll][Dd][Ii][Nn][Gg]") kind = "holding point" if kind == 1: hits = page.get_by_regex(r"[Ee]ntry.*[Ee]xit.*point") kind = "entry/exit point" if len(hits) == 0: continue for holdingheading in hits: items = sorted( page.get_partially_in_rect( holdingheading.x1 + 2.0, holdingheading.y2 + 0.1, holdingheading.x1 + 0.5, 100 ), key=lambda x: x.y1, ) items = [x for x in items if not x.text.startswith(" ")] # print "Holding items:",items for idx, item in enumerate(items): print "Holding item", item y1 = item.y1 if idx == len(items) - 1: y2 = 100 else: y2 = items[idx + 1].y1 items2 = [ x for x in page.get_partially_in_rect(item.x1 + 1, y1 + 0.3, item.x1 + 40, y2 - 0.1) if x.x1 >= item.x1 - 0.25 and x.y1 >= y1 - 0.05 and x.y1 < y2 - 0.05 ] s = (" ".join(page.get_lines(items2))).strip() print "Holding lines:", repr(page.get_lines(items2)) # if s.startswith("ft Left/3"): #Special case for ESOK # s,=re.match("ft Left/3.*?([A-Z]{4,}.*)",s).groups() # m=re.match("ft Left/\d+.*?([A-Z]{4,}.*)",s) # if m: # s,=m.groups() if s.startswith("LjUNG"): # Really strange problem with ESCF s = s[0] + "J" + s[2:] if s.lower().startswith("holding"): sl = s.split(" ", 1) if len(sl) > 1: s = sl[1] s = s.strip() if kind == "entry/exit point" and s.startswith("HOLDING"): continue # reached HOLDING-part of VAC # Check for other headings # Fixup strange formatting of points in some holding items: (whitespace between coord and 'E') s = re.sub(ur"(\d+)\s*(N)\s*(\d+)\s*(E)", lambda x: "".join(x.groups()), s) m = re.match(r"([A-Z]{2,}).*?(\d+N)\s*(\d+E).*", s) if not m: m = re.match(r".*?(\d+N)\s*(\d+E).*", s) if not m: continue assert m lat, lon = m.groups() # skavsta if icao == "ESKN": if s.startswith(u"Hold north of T"): name = "NORTH" elif s.startswith(u"Hold south of B"): name = "SOUTH" else: assert 0 # add more specials here else: continue else: name, lat, lon = m.groups() try: coord = parse_coords(lat, lon) except Exception: print "Couldn't parse:", lat, lon continue # print name,lat,lon,mapper.format_lfv(*mapper.from_str(coord)) if name.count("REMARK") or len(name) <= 2: print "Suspicious name: ", name # sys.exit(1) continue points[icao + " " + name] = dict(name=icao + " " + name, icao=icao, pos=coord, kind=kind) # for point in points.items(): # print point # sys.exit(1) def fixhex11(s): out = [] for c in s: i = ord(c) if i >= 0x20: out.append(c) continue if i in [0x9, 0xA, 0xD]: out.append(c) continue out.append(" ") return "".join(out) for ad in ads: icao = ad["icao"] if icao in big_ad: # print "Parsing ",icao p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_en.pdf" % (icao, icao), loadhook=fixhex11) ad["aiptexturl"] = p.get_url() firstpage = p.parse_page_to_items(0) te = "\n".join(firstpage.get_all_lines()) # print te coords = re.findall(r"ARP.*(\d{6}N)\s*(\d{7}E)", te) if len(coords) > 1: raise Exception( "First page of airport info (%s) does not contain exactly ONE set of coordinates" % (icao,) ) if len(coords) == 0: print "Couldn't find coords for ", icao # print "Coords:",coords ad["pos"] = parse_coords(*coords[0]) elev = re.findall(r"Elevation.*?(\d{1,5})\s*ft", te, re.DOTALL) if len(elev) > 1: raise Exception( "First page of airport info (%s) does not contain exactly ONE elevation in ft" % (icao,) ) if len(elev) == 0: print "Couldn't find elev for ", icao ad["elev"] = int(elev[0]) freqs = [] found = False thrs = [] # uprint("-------------------------------------") for pagenr in xrange(p.get_num_pages()): page = p.parse_page_to_items(pagenr) # uprint("Looking on page %d"%(pagenr,)) if ( 0 ): # opening hours are no longer stored in a separate document for any airports. No need to detect which any more (since none are). for item in page.get_by_regex(".*OPERATIONAL HOURS.*"): lines = page.get_lines(page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100)) for line in lines: things = ["ATS", "Fuelling", "Operating"] if not line.count("AIP SUP"): continue for thing in things: if line.count(thing): ad["aipsup"] = True for item in page.get_by_regex(".*\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*.*"): # uprint("Physical char on page") lines = page.get_lines(page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100)) seen_end_rwy_text = False for line, nextline in izip(lines, lines[1:] + [None]): # uprint("MAtching: <%s>"%(line,)) if re.match(ur"AD\s+2.13", line): break if line.count("Slope of"): break if line.lower().count("end rwy:"): seen_end_rwy_text = True if line.lower().count("bgn rwy:"): seen_end_rwy_text = True m = re.match(ur".*(\d{6}\.\d+)[\s\(\)\*]*(N).*", line) if not m: continue m2 = re.match(ur".*(\d{6,7}\.\d+)\s*[\s\(\)\*]*(E).*", nextline) if not m2: continue latd, n = m.groups() lond, e = m2.groups() assert n == "N" assert e == "E" lat = latd + n lon = lond + e rwytxts = page.get_lines(page.get_partially_in_rect(0, line.y1 + 0.05, 12, nextline.y2 - 0.05)) uprint("Rwytxts:", rwytxts) rwy = None for rwytxt in rwytxts: # uprint("lat,lon:%s,%s"%(lat,lon)) # uprint("rwytext:",rwytxt) m = re.match(ur"\s*(\d{2}[LRCM]?)\b.*", rwytxt) if m: assert rwy == None rwy = m.groups()[0] if rwy == None and seen_end_rwy_text: continue print "Cur airport:", icao already = False assert rwy != None seen_end_rwy_text = False for thr in thrs: if thr["thr"] == rwy: raise Exception("Same runway twice on airfield:" + icao) thrs.append(dict(pos=mapper.parse_coords(lat, lon), thr=rwy)) assert len(thrs) >= 2 for pagenr in xrange(0, p.get_num_pages()): page = p.parse_page_to_items(pagenr) matches = page.get_by_regex(r".*ATS\s+COMMUNICATION\s+FACILITIES.*") # print "Matches of ATS COMMUNICATION FACILITIES on page %d: %s"%(pagenr,matches) if len(matches) > 0: commitem = matches[0] curname = None callsign = page.get_by_regex_in_rect(ur"Call\s*sign", 0, commitem.y1, 100, commitem.y2 + 8)[0] for idx, item in enumerate( page.get_lines( page.get_partially_in_rect(callsign.x1 - 0.5, commitem.y1, 100, 100), fudge=0.3, order_fudge=15, ) ): if item.strip() == "": curname = None if re.match(".*RADIO\s+NAVIGATION\s+AND\s+LANDING\s+AIDS.*", item): break # print "Matching:",item m = re.match(r"(.*?)\s*(\d{3}\.\d{1,3})\s*MHz.*", item) # print "MHZ-match:",m if not m: continue # print "MHZ-match:",m.groups() who, sfreq = m.groups() freq = float(sfreq) if abs(freq - 121.5) < 1e-4: if who.strip(): curname = who continue # Ignore emergency frequency, it is understood if not who.strip(): if curname == None: continue else: curname = who freqs.append((curname.strip().rstrip("/"), freq)) for pagenr in xrange(0, p.get_num_pages()): page = p.parse_page_to_items(pagenr) matches = page.get_by_regex(r".*ATS\s*AIRSPACE.*") # print "Matches of ATS_AIRSPACE on page %d: %s"%(pagenr,matches) if len(matches) > 0: heading = matches[0] desigitem, = page.get_by_regex("Designation and lateral limits") vertitem, = page.get_by_regex("Vertical limits") airspaceclass, = page.get_by_regex("Airspace classification") lastname = None subspacelines = dict() subspacealts = dict() for idx, item in enumerate( page.get_lines(page.get_partially_in_rect(desigitem.x2 + 1, desigitem.y1, 100, vertitem.y1 - 1)) ): if item.count("ATS airspace not established"): assert idx == 0 break if item.strip() == "": continue m = re.match(r"(.*?)(\d{6}N\s+.*)", item) if m: name, coords = m.groups() name = name.strip() else: name = item.strip() coords = None if name: lastname = name if coords: subspacelines.setdefault(lastname, []).append(coords) assert lastname lastname = None # print "Spaces:",subspacelines # print "ICAO",ad['icao'] # altlines=page.get_lines(page.get_partially_in_rect(vertitem.x2+1,vertitem.y1,100,airspaceclass.y1-0.2)) # print "Altlines:",altlines subspacealts = dict() subspacekeys = subspacelines.keys() allaltlines = " ".join( page.get_lines( page.get_partially_in_rect( vertitem.x1 + 0.5, vertitem.y1 + 0.5, 100, airspaceclass.y1 - 0.2 ) ) ) single_vertlim = False totalts = list(mapper.parse_all_alts(allaltlines)) # print "totalts:",totalts if len(totalts) == 2: single_vertlim = True for subspacename in subspacekeys: ceil = None floor = None subnames = [subspacename] if subspacename.split(" ")[-1].strip() in ["TIA", "TIZ", "CTR", "CTR/TIZ"]: subnames.append(subspacename.split(" ")[-1].strip()) # print "Parsing alts for ",subspacename,subnames try: for nametry in subnames: if ( single_vertlim ): # there's only one subspace, parse all of vertical limits field for this single one. items = [vertitem] else: items = page.get_by_regex_in_rect( nametry, vertitem.x2 + 1, vertitem.y1, 100, airspaceclass.y1 - 0.2 ) for item in items: alts = [] for line in page.get_lines( page.get_partially_in_rect( item.x1 + 0.5, item.y1 + 0.5, 100, airspaceclass.y1 - 0.2 ) ): # print "Parsing:",line line = line.replace(nametry, "").lower().strip() parsed = list(mapper.parse_all_alts(line)) if len(parsed): alts.append(mapper.altformat(*parsed[0])) if len(alts) == 2: break if alts: # print "alts:",alts ceil, floor = alts raise StopIteration except StopIteration: pass assert ceil and floor subspacealts[subspacename] = dict(ceil=ceil, floor=floor) spaces = [] for spacename in subspacelines.keys(): altspacename = spacename # print "Altspacename: %s, subspacesalts: %s"%(altspacename,subspacealts) space = dict( name=spacename, ceil=subspacealts[altspacename]["ceil"], floor=subspacealts[altspacename]["floor"], points=parse_coord_str(" ".join(subspacelines[spacename])), freqs=list(set(freqs)), ) if True: vs = [] for p in space["points"]: x, y = mapper.latlon2merc(mapper.from_str(p), 13) vs.append(Vertex(int(x), int(y))) p = Polygon(vvector(vs)) if p.calc_area() <= 30 * 30: pass # print space pass # print "Area:",p.calc_area() assert p.calc_area() > 30 * 30 # print "Area: %f"%(p.calc_area(),) spaces.append(space) # print space ad["spaces"] = spaces found = True if found: break assert found ad["runways"] = rwy_constructor.get_rwys(thrs)
from pascal_loader.main_io import PascalFile from parse import Parser from emulator import Emulator if __name__ == '__main__': pretty_printer = pprint.PrettyPrinter() # UNCOMMENT the below statements one at a time # tokens = get_token(PascalFile(input_file_location='simple_assignment.pas', output_location='')) # tokens = get_token(PascalFile(input_file_location='complex_assignments.pas', output_location='')) # tokens = get_token(PascalFile(input_file_location='control_repeat.pas', output_location='')) # tokens = get_token(PascalFile(input_file_location='control_while.pas', output_location='')) # tokens = get_token(PascalFile(input_file_location='control_if.pas', output_location='')) # tokens = get_token(PascalFile(input_file_location='control_for.pas', output_location='')) # tokens = get_token(PascalFile(input_file_location='case_statement.pas', output_location='')) tokens = get_token(PascalFile(input_file_location='arrays.pas', output_location='')) # This prints tokens, uncomment to see the generated tokens # pretty_printer.pprint(tokens) print '----------------------------------' # setting verbose=True to parser will print to console as tokens are matched/warnings # parser = Parser(token_list=tokens, verbose=True) parser = Parser(token_list=tokens) byte_array = parser.parse() # This prints the byte array, uncomment to see the bytearray # pretty_printer.pprint(byte_array) print '----------------------------------' emulator = Emulator(byte_array) emulator.start()