def parse_mountain_area():
    p = Parser("/AIP/ENR/ENR%201/ES_ENR_1_1_en.pdf")
    #alongborder="610213N 0114917E - 632701N 0114917E - 661457N 0141140E - 682200N 0173441E - 683923N 0183004E - 683141N 0194631E - 690945N 0202604E - 683533N 0221411E - 680424N 0233833E - 670159N 0240734E - 663602N 0240455E - "
    areas = []
    for pagenr in xrange(p.get_num_pages()):
        #print "Processing page %d"%(pagenr,)
        page = p.parse_page_to_items(pagenr)
        lines = page.get_lines(page.get_all_items())
        allofit = " ".join(lines)

        allofit = allofit.replace(
            u"along the Swedish/Norwegian and Swedish/Finnish border to",
            u"Along the common X/Y state boundary to")
        allofit = allofit.replace(u"–", "-")

        coordarea = re.match(
            ur".*Mountainous\s+area\s+of\s+Sweden.{1,10}lateral\s+limits(.*?)AIRAC.*",
            allofit)
        if coordarea:
            points = []
            txt, = coordarea.groups()
            print "area:<", txt, ">"
            points = mapper.parse_coord_str(txt, context="sweden")
            assert (len(points) > 3)
            print "Point:", len(points)
            areas.append(
                dict(name="Mountainous Area",
                     floor="GND",
                     ceiling="UNL",
                     points=points,
                     type="mountainarea",
                     freqs=[]))
    print len(areas)
    assert len(areas) == 1
    return areas
Example #2
0
    def execute(self):
        self._pool = Pool(config.pool_processors)
        self._thread_pool = ThreadPool(5)

        cli_stats = CliStats(self._context, self._writer)
        cli_stats.execute()

        self.clean_up_docker()

        logging.info(config.log_line_run_end + self._context.run_name)
        _flush_log_handlers()
        _extract_from_file(config.log_file, config.run_log,
                           config.log_line_run_start + self._context.run_name,
                           config.log_line_run_end + self._context.run_name)

        parser = Parser(self._context, self._writer)
        parser.execute()

        _collect_general_information()

        self._context.step_times.append(
            StepTimes(time.time(), 'postprocessing_end'))
        self._writer.write_csv(config.step_times_csv_file_name,
                               StepTimes.csv_header, self._context.step_times)

        _create_report()

        self._pool.close()
        self._thread_pool.close()
        logging.info('Executed post processing')
def parse_mountain_area():
    p=Parser("/AIP/ENR/ENR%201/ES_ENR_1_1_en.pdf")
    #alongborder="610213N 0114917E - 632701N 0114917E - 661457N 0141140E - 682200N 0173441E - 683923N 0183004E - 683141N 0194631E - 690945N 0202604E - 683533N 0221411E - 680424N 0233833E - 670159N 0240734E - 663602N 0240455E - "
    areas=[]
    for pagenr in xrange(p.get_num_pages()):
        #print "Processing page %d"%(pagenr,)
        page=p.parse_page_to_items(pagenr)
        lines=page.get_lines(page.get_all_items())
        allofit=" ".join(lines)
        
        allofit=allofit.replace(u"along the Swedish/Norwegian and Swedish/Finnish border to",
                                    u"Along the common X/Y state boundary to"                                
                                )
        allofit=allofit.replace(u"–","-")
        
        coordarea=re.match(ur".*Mountainous\s+area\s+of\s+Sweden.{1,10}lateral\s+limits(.*?)AIRAC.*",allofit)
        if coordarea:
            points=[]
            txt,=coordarea.groups()
            print "area:<",txt,">"
            points=mapper.parse_coord_str(txt,context="sweden")
            assert(len(points)>3)
            print "Point:",len(points)
            areas.append(dict(
                    name="Mountainous Area",
                    floor="GND",
                    ceiling="UNL",
                    points=points,
                    type="mountainarea",
                    freqs=[]))
    print len(areas)
    assert len(areas)==1
    return areas     
Example #4
0
    def run_parser(self, file_path):
        """Call parse_network() from path specified.

        To do so first reinitzialize all modules and cycles_completed.
        """
        # clear all at the begging
        self.cycles_completed = 0
        self.names = Names()
        self.devices = Devices(self.names)
        self.network = Network(self.names, self.devices)
        self.monitors = Monitors(self.names, self.devices, self.network)
        self.scanner = Scanner(file_path, self.names)
        self.parser = Parser(self.names, self.devices, self.network,
                             self.monitors, self.scanner)
        # Capture the stdout from parse_network()
        captured_stdout = io.StringIO()
        with redirect_stdout(captured_stdout):
            if self.parser.parse_network():
                self.parse_success = True
                self.log_message(_("Succesfully parsed network."))
            else:
                self.parse_success = False
                self.log_message(_("Failed to parse network."))
                # Show error messages captured in activity log
                self.log_message(captured_stdout.getvalue(),
                                 self.MONOSPACE_FONT)
Example #5
0
 def yield_parse_result(line):
     std_operator_parser = Parser('{indent}std::operator{op}({params});')
     if (result := std_operator_parser.parse(line)
         ) and result.named['op'] in STREAM_OPERATORS:
         indent, op, params = result.named[
             'indent'], ' ' + result.named['op'], result.named['params']
         return indent + op.join(params.split(',')) + ';'
Example #6
0
def run():
    data = open_file(argv[1])
    lexer = Lexer(data)
    tokens = lexer.lex()

    parser = Parser(tokens)
    parser.parse()
Example #7
0
def main():

    url = str(sys.argv[-3])
    url_for_proxy_check = str(sys.argv[-2])
    path = sys.argv[-1]

    chrome = webdriver.Chrome()
    chrome.get(url)
    hide = HidemyName(chrome)

    data = hide.main()
    extract = Parser(data, path)

    result = extract.excute()
    """
    >>> result = {
            "address": ["1.1.1.1","2.2.2.2"],
            "port": ["3128","8080"],
            "country": [],...
            }
    >>> proxy_list = [n+":"+p for n,p in zip(result['address'],result['port'])]
    >>> proxy_list 
    ["1.1.1.1:3128","2.2.2.2:8080"]
    """
    proxy_list = [
        n + ":" + p for n, p in zip(result['address'], result['port'])
    ]

    print("CHECING PROXIES FOR %s ".center(80, "#") % url_for_proxy_check)
    final = check(url_for_proxy_check, proxy_list)
    df = pd.DataFrame(final).to_csv(path, index=False)
    print("Done".center(80, "-"))
Example #8
0
 def yield_parse_result(line1, line2):
     subsitution_parser = Parser('{}v{var_name} = {value};')
     if (result := subsitution_parser.parse(line1)
         ) and 'v' + result.named['var_name'] in line2 and any(
             op in line2 for op in STREAM_OPERATORS):
         var_name, value = result.named['var_name'], result.named['value']
         return '', line2.replace('v' + var_name, value)
Example #9
0
def evaluate(expression: str, is_rad: bool, is_binary: bool) -> str:
    """Evaluates a mathematical expression passed as a string and returns the result as another string.

    Args:
        expression (str): Expression to evaluate
        is_rad (str): Determines if in radian mode
        is_binary (str): Determines if the input and output is in binary

    Returns:
        str: Result of evaluation of expression
    """

    if exp_is_blank(expression):
        return ""

    parser = Parser(is_rad, is_binary)
    try:
        # Make implicit multiplications between bracketed items explicit.
        expression = re.sub('(?<=\d|\))(\()', '*(', expression)
        # Ensure that characters used can be read by parser.
        # Map euler's constant to the letter e when not surrounded by other letters
        expression = re.sub('(?![a-zA-Z])e(?![a-zA-Z])', 'E', expression)
        expression = expression.replace('π', 'PI')
        expression = expression.replace('√', 'sqrt')

        # Evaluate expression
        evaluation = parser.evaluate(expression)
        if is_binary:
            evaluation = display.decimal_to_binary(evaluation)
        return evaluation
    except Exception as e:
        return str(e)
Example #10
0
 def type(self, string):
     '''
     @param string:
         'string' | 'int' | 'float' | 'bool' | 'dict' | 'array'
     '''
     tp = type(self.__param)
     
     if string == 'string':
         return tp == types.StringType
     elif string == 'int':
         tmp = Parser.int(self.__param, None)
         if tmp != None:
             self.__param = tmp
         tp = type(self.__param)
         return tp == types.IntType
     elif string == 'float':
         tmp = Parser.float(self.__param, None)
         if tmp != None:
             self.__param = tmp
         tp = type(self.__param)
         return tp == types.FloatType
     elif string == 'bool':
         return tp == types.BooleanType
     elif string == 'dict':
         return tp == types.DictionaryType
     elif string == 'array':
         return tp == types.ListType
     return False
Example #11
0
    def solve(self, expr: str = ""):
        """solve cryptarithm problem"""
        print("Problem: {}".format(expr))

        p = Parser(tokenize(expr))
        pr = Problem(p.parse())
        print(pr.search_all_solution())
Example #12
0
 def test_magnesium_hydroxide(self):
     p = Parser()
     self.assertEqual(p.parse_molecule(magnesium_hydroxide), {
         'Mg': 1,
         'O': 2,
         'H': 2
     })
Example #13
0
class Crawler():
    def __init__(self, params):
        self.params = params
        self.log = {"fetching": None, "crawling": None}
        self.results = None
        self._request = None
        self._parser = None

    def get_all(self):
        self._request = RequestSinglePage(params=self.params)
        self._request.get()

        self.log["fetching"] = self._request.log

        if not self._request.log.successful:
            self.results = None
            return None

        self._parser = Parser(self._request.page_content)
        self._parser.extract_fields()
        self.results = self._parser.results
        self.log["crawling"] = self._parser._log
        self.results_df = pd.DataFrame(self.results)

    def get_pandas_df(self):
        return self.results_df
Example #14
0
    def test_numbers(self):
        tokens = [
            Token(TokenType.NUMBER, 27),
            Token(TokenType.PLUS),
            Token(TokenType.NUMBER, 14)
        ]
        node = Parser(tokens).parse()
        self.assertEqual(node, AddNode(NumberNode(27), NumberNode(14)))

        tokens = [
            Token(TokenType.NUMBER, 27),
            Token(TokenType.MINUS),
            Token(TokenType.NUMBER, 14)
        ]

        node = Parser(tokens).parse()
        self.assertEqual(node, SubtractNode(NumberNode(27), NumberNode(14)))

        tokens = [
            Token(TokenType.NUMBER, 27),
            Token(TokenType.MULTIPLY),
            Token(TokenType.NUMBER, 14)
        ]

        node = Parser(tokens).parse()
        self.assertEqual(node, MultiplyNode(NumberNode(27), NumberNode(14)))

        tokens = [
            Token(TokenType.NUMBER, 27),
            Token(TokenType.DIVIDE),
            Token(TokenType.NUMBER, 14)
        ]

        node = Parser(tokens).parse()
        self.assertEqual(node, DivideNode(NumberNode(27), NumberNode(14)))
def ingest_data_and_respond(data, log):
    log.info('Got POSTed data: {}'.format(data))
    timeseries_df, err = Parser.validate_parse_consumption_data(data, log)
    if err != '':
        return err, HTTPStatus.BAD_REQUEST

    ship_id = data['spaceship_id']
    units = data['units'].lower()

    if units == 'kwh':
        timeseries, err = Parser.split(timeseries_df, log)
    elif units == 'kw':
        timeseries, err = Parser.convert_and_split(timeseries_df, log)

    if err != '':
        return err, HTTPStatus.BAD_REQUEST

    log.debug('Saving dataframe: {} \nfor ship_id: : {}'.format(
        timeseries,
        ship_id,
    ))

    if not DBManager.save_energy_entry(ship_id, timeseries):
        log.error('db save failed for ship: {}'.format(ship_id))
        log.error('timeseries: {}'.format(timeseries))
        res = DBManager.get_full_energy_entry(ship_id)
        log.error('full ship info in db befre fail: {}'.format(res))
        return 'DB error', HTTPStatus.SERVICE_UNAVAILABLE

    return 'Data saved successfully for ship {}'.format(ship_id)
Example #16
0
 def __init__(self):
     self.pa=Parser()
     self.pp=PinYin()
     self.pp.load_word()
     with open(os.path.join(os.path.dirname(__file__),'pinyin_dict'),'r') as ff:
         line=ff.readline()
         self.jj_dict=json.loads(line)
         ff.close()
Example #17
0
 def test_fremy_salt(self):
     p = Parser()
     self.assertEqual(p.parse_molecule(fremy_salt), {
         'K': 4,
         'O': 14,
         'N': 2,
         'S': 4
     })
Example #18
0
 def __init__(self, args):
     Parser.__init__(self, args)
     self.heuristique = args.heur
     self.size = self.size
     self.solvable = True
     self.start_map = np.zeros(self.size)
     self.final_map = np.zeros(self.size)
     self.curent_state = (np.zeros(self.size), self.size * self.size)
 def __init__(self, db_config, create_new_tables=False):
     self._db = Postgres_db(db_config)
     self._parser = Parser()
     if create_new_tables is True:
         try:
             self._db.drop_existing_tables_from_db()
         except Exception as e:
             logging.exception(e)
         self._db.create_tables()
Example #20
0
 def __init__(self, config):
     self.logger = logging.getLogger()
     self.connector = Connector(config.server.host,
                                config.server.port,
                                config.server.enablessl,
                                config.server.crtfile,
                                config.server.timeout,
                                )
     self.parser = Parser(config.main.serializer)
Example #21
0
def add():
    expression = text = request.form['expression']
    p = Parser(expression)
    value = p.getValue()
    now = datetime.utcnow()

    db.session.add(Expression(text=expression, value=value, now=now))
    db.session.commit()

    return redirect(url_for('index'))
Example #22
0
def intermediate(code):
    parser = Parser(code)
    block = parser.parse()
    # print(block)
    fd = {'params': {'var': True, 'params': []}, 'block': block}
    info = new_func_info(None, fd)
    info.add_local_var('_ENV')
    cg_func_def_exp(info, fd, 0)
    # print(info.sub_funcs[0].ins)
    return info
Example #23
0
def repl_core(input_file, output_file):
    base_scope = create_built_in_scope()
    scope = Scope(base_scope)
    parser = Parser(file_token_stream(input_file))
    for paragraph in parser.iter_paragraph(scope):
        result = evaluate(paragraph, scope)
        if isinstance(result, Action):
            result.do(scope)
        else:
            print(result, file=output_file)
Example #24
0
def main(args):
    parser = Parser(prog=utils.abs_path('./http_server.py'))

    parser.add_argument("-port",
                        default=gv.cdn_port(),
                        help="port will run: default 8000")
    parser.add_argument("-address",
                        default='',
                        help="address bind, default any")
    parser.add_argument("-path",
                        default=gv.cdn_path(),
                        help="path will run http, default :" +
                        utils.abs_path(gv.cdn_path()))

    arguments = parser.parse_args(args)
    run(arguments.port, arguments.address, arguments.path)

    # parser = argparse.ArgumentParser()
    # parser.add_argument('--cgi', action='store_true',
    #                     help='Run as CGI Server')
    # parser.add_argument('--bind', '-b', default='', metavar='ADDRESS',
    #                     help='Specify alternate bind address '
    #                          '[default: all interfaces]')
    # parser.add_argument('port', action='store',
    #                     default=8000, type=int,
    #                     nargs='?',
    #                     help='Specify alternate port [default: 8000]')
    #
    # args = parser.parse_args()
    # if args.cgi:
    #     handler_class = CGIHTTPRequestHandler
    # else:
    #     handler_class = SimpleHTTPRequestHandler
    # http.server.test(HandlerClass=handler_class, port=args.port, bind=args.bind)
    pass
Example #25
0
def preview(id):
    record = db_session.query(Page).get(id)
    if record is not None:
        parser = Parser()
        html = parser.parse(record.data)
        html = html.replace('</body>', '{{ post_body|safe }}</body>')

        last_saved = record.updated.strftime('%B %d, %Y at %I:%M%p')
        post_body = render_template('preview_post_body.html', last_saved=last_saved)
        return render_template_string(html, post_body=post_body),
    return render_template('empty_preview.html')
Example #26
0
def parse():

    logger.info("parse")
    storage = Persistor()
    parser = Parser()

    raw_data = storage.read_raw_data(SCRAPPED_FILE)
    data = parser.process_rawdata(raw_data)  #processing raw data
    parsed_files = [parser.parse_object(file)
                    for file in data]  #parsing every object
    storage.save_csv(parsed_files, TABLE_FORMAT_FILE)  #save our data
Example #27
0
def main():
    """filename = ''
    try:
        filename = sys.argv[1]
    except Exception as e:
        print(e)"""

    while True:
        lexer = Lexer(text=input('>>> '))
        parser = Parser(lexer)
        print(parser.read())
Example #28
0
    def test_parser(self):
        cases = [('(1+2)<>3', ['1', '2', '+', '3', '<>']),
                 ('1+2*3', ['1', '2', '3', '*', '+']),
                 ('1 >= 2', ['1', '2', '>=']),
                 ('(1*2+9) = (2*3-6)',
                  ['1', '2', '*', '9', '+', '2', '3', '*', '6', '-', '='])]

        parser = Parser()

        for expr, correct in cases:
            self.assertEqual(parser.parse(expr), correct)
Example #29
0
def run(s):
    scanner = Scanner(s, error)
    tokens = scanner.scan_tokens()
    parser = Parser(tokens, parse_error)
    statements = parser.parse()
    if hasError:
        return
    if hasRuntimeError:
        return
    interpreter = Interpreter()
    interpreter.interpret(statements)
Example #30
0
def load_basic_tools(fname="basic.gl"):
    parser = Parser()
    parser.parse_file(fname)
    tool_dict = parser.tool_dict
    basic_tools = ImportedTools(tool_dict)
    basic_tools.line.add_symmetry((1, 0))
    basic_tools.dist.add_symmetry((1, 0))
    basic_tools.intersection_ll.add_symmetry((1, 0))
    basic_tools.midpoint.add_symmetry((1, 0))
    add_movable_tools(tool_dict, basic_tools)
    return ImportedTools(tool_dict)
Example #31
0
def getReply(message):
    p = Parser()
    parse_tree = p.parse(message)

    translation = list()
    find_best_translation(parse_tree, translation)
    print(message)
    print(translation)
    answer = " ".join(translation)

    # return the formulated answer
    return answer
Example #32
0
def test_error_location(names, devices, network, monitors, capsys):
    """Test if error detection correctly prints out location of error
    """
    sc = Scanner(error_location, names)
    parser = Parser(names, devices, network, monitors, sc)
    parser.parse_network()
    captured = capsys.readouterr()

    line_number = "line 10"
    try:
        assert (line_number in captured.out)
    except AttributeError:
        assert (line_number in captured[0])
Example #33
0
def ey_parse_airfield(icao):
    spaces=[]                           
    p=Parser("/EY_AD_2_%s_en.pdf"%(icao,),lambda x:x)
    freqs=[]
    for nr in xrange(0,p.get_num_pages()): 
        page=p.parse_page_to_items(nr)
        if nr==0:
            #[–-]
            nameregex=ur"\s*%s\s*[–-]\s*(.*?)\s*$"%(icao,)
            print "Nameregex",nameregex            
            nameitem=page.get_by_regex(nameregex,re.UNICODE)[0]            
            name,=re.match(nameregex,nameitem.text,re.UNICODE).groups()
            name=name.replace("Tarptautinis","International")
            #print repr(name)
            #sys.exit(1)
            coordhdg,=page.get_by_regex(ur".*ARP\s*koordinat.s.*",re.DOTALL)
            coord=page.get_partially_in_rect(
                            coordhdg.x2+4,coordhdg.y1+0.1,100,coordhdg.y2-0.1)[0]
            pos,=mapper.parsecoords(fixup(coord.text.replace(" ","")))
            
            elevhdg,=page.get_by_regex(ur".*Vietos\s*aukštis.*",re.DOTALL)
            elevitem,=page.get_partially_in_rect(
                            elevhdg.x2+1,elevhdg.y1+0.1,100,elevhdg.y2-0.1)
            elev,=re.match(ur"(\d+)\s*FT.*",elevitem.text).groups()
            elev=int(elev)
                        
    
        for comm in page.get_by_regex(ur".*ATS\s*COMMUNICATION\s*FACILITIES.*",re.DOTALL):
            ends=page.get_by_regex_in_rect(
                        ur".*RADIO\s*NAVIGATION.*",
                        0,comm.y2,100,100)
            if ends:
                end=ends[0].y1-0.1
            else:
                end=100
            freqitems=page.get_by_regex_in_rect(
                        ur".*\d{3}\.\d{3}.*",
                    0,comm.y2,100,end-0.1)
            lastservice=None
            for freq in freqitems:
                service=page.get_partially_in_rect(
                    0,freq.y1+0.1,17,freq.y2-0.1)
                if service:
                    lastservice=service[0]
                print lastservice
                assert len(spaces)==0
                for freqstr in re.findall(ur"\d{3}\.\d{3}",freq.text):
                    if freqstr!="121.500" and freqstr!="243.000":
                        freqs.append((lastservice.text.split("/")[0],float(freqstr)))
Example #34
0
    def messageParcer(self):
       logging.debug("messageParcer in thread " + str(thread.get_ident()))
       parser = None
       # get parsing patterns from config file when in testing mode
       if self.testEnabled:
         parser = Parser(self.successPattern, self.failurePattern, self.testEnabled)
       else:
         parser = Parser()

       while self.running:
            msg = queue.get()
            eventLog = parser.parseLogLine(msg)
            if eventLog:
                algorithm.processEventLog(eventLog)
                logging.debug("messages in queue " + str(queue.qsize()) + ", received %r from %s:%d" % (msg.data, msg.host, msg.port))
Example #35
0
 def __init__(self, config):
     connector = Connector(config.server.host,
                           config.server.port,
                           #config.server.keyfile,
                           config.server.crtfile,
                           config.server.timeout,
                           )
     self.socket = connector.connect()
     self.parser = Parser(config.main.serializer)
Example #36
0
class ClientEndpoint(object):

    socket = None
    parser = None

    def __init__(self, config):
        connector = Connector(config.server.host,
                              config.server.port,
                              #config.server.keyfile,
                              config.server.crtfile,
                              config.server.timeout,
                              )
        self.socket = connector.connect()
        self.parser = Parser(config.main.serializer)


    def request(self, data):

        pack = self.parser.encode(data)

        self.socket.sendall(pack)
        #self.socket.write(pack)
        response = self.socket.read(1024)
        return self.parser.decode(response)
        #return self.parser.decode(self.socket.read(1024))

        #return self.parser.decode(self._recv())

    def _recv(self, n=1):
        data = ""
        chunk = ""
        while len(data) < n:
            try:
                chunk = self.socket.recv(n - len(data))
                #chunk = self.socket.read(n - len(data))
            except Exception, e:
                print "SSL read failed: %s" % str(e)

            if len(chunk) == 0:
                break
            data += chunk
        print "\033[33mdata: %s\033[0m" % str(data)
        return data
Example #37
0
def extract_single_sup(full_url,sup,supname,opening_ours):
    #print getxml("/AIP/AD/AD 1/ES_AD_1_1_en.pdf")
    ads=[]
    try:
        p=Parser(sup)
    except Exception:
        print "Could't parse",sup
        #Some AIP SUP's contain invalid XML after conversion from PDF.
        #skip these for now
        return []
    areas=[]
    startpage=None
    for pagenr in xrange(p.get_num_pages()):            
        page=p.parse_page_to_items(pagenr)
        #print page.get_all_items()
        for item in page.get_by_regex(".*HOURS OF OPERATION.*"):
            lines=page.get_lines(page.get_partially_in_rect(0,item.y1-2,100,item.y2+2))
            found=False
            for line in lines:
                if re.match(ur".*SUP\s*\d+/\d{4}\.?\s+HOURS OF OPERATION\s*$",line):
                    opening_ours.add(p.get_url())
                    print "Found hours:",opening_ours

        try:
            for areaname,coords,meta in find_areas(page):
                if areaname:
                    name="%s (on page %d of %s)"%(areaname,pagenr+1,supname)
                else:
                    name="Area on page %d of %s"%(pagenr+1,supname)
                    
                    
                print "Number of points",len(coords)
                areas.append(dict(
                        url=full_url,
                        pagenr=pagenr+1,
                        sup=supname,
                        name=name,
                        type='aip_sup',
                        points=coords))
        except Exception:
            pass
Example #38
0
def main(fileObj):
    global_env = make_builtins()
    tokenizer = Tokenizer(PeekableStream(fileObj), False)
    tokenList = tokenizer.read_all()
    tokenString = tokenizer.as_string()
    # print tokenString

    parser = Parser(tokenList)
    code = parser.read_all()
    codeString = parser.as_string()
    # print codeString
        
    env = Dict(global_env)
    val = code.evaluate(env)
    result = val.call(None, List([]), env)

    if result is not None:
        print result.as_string("")
    else:
        print None
    print "-"*50
Example #39
0
def compile(p, debug = False):
    
    def print_tokens(tokens):
        for t in tokens:
            print(t)

    lexer = Lexer(p)
    parser = Parser(lexer)
    tokens = parser.parse()

    handler = Handler()
    
    if debug:
        print_tokens(tokens) 

    nfa_stack = []
    
    for t in tokens:
        handler.handlers[t.name](t, nfa_stack)
    
    assert len(nfa_stack) == 1
    return nfa_stack.pop() 
Example #40
0
class Pipeline():
    def __init__(self):
        self.session = requests.Session()
        self.session.headers = {'user-agent': 'shr-podcasts-bot'}
        self.scraper = Scraper(self.session)
        self.parser = Parser(self.session)
        self.storage = Storage()

    def run(self, root, start_page):
        podcasts = (self.parser.parse_feed(feed) for feed in
                    self.scraper.scrape(root, start_page))
        for podcast in filter(None, podcasts):
            self.storage.store_podcast(podcast)
Example #41
0
def parse_sig_points():
    p=Parser("/AIP/ENR/ENR 4/ES_ENR_4_4_en.pdf")
    points=[]
    for pagenr in xrange(p.get_num_pages()):
        #print "Processing page %d"%(pagenr,)
        page=p.parse_page_to_items(pagenr)
        lines=page.get_lines(page.get_all_items(),order_fudge=20)
        for line in lines:
            cols=line.split()
            if len(cols)>2:
                coordstr=" ".join(cols[1:3])
                #print cols
                if len(mapper.parsecoords(coordstr))>0:
                    crd=mapper.parsecoord(coordstr)
                    #print "Found %s: %s"%(cols[0],crd)
                    points.append(dict(
                        name=cols[0],
                        kind='sig. point',
                        pos=crd))

    p=Parser("/AIP/ENR/ENR 4/ES_ENR_4_1_en.pdf")
    for pagenr in xrange(p.get_num_pages()):
        page=p.parse_page_to_items(pagenr)
        nameheading,=page.get_by_regex(r".*Name of station.*")
        freqheading,=page.get_by_regex(r".*Frequency.*")
        coordheading,=page.get_by_regex(r".*Coordinates.*")
        items=sorted(list(x for x in page.get_partially_in_rect(nameheading.x1,nameheading.y2+2,nameheading.x1+1,100) if x.text.strip()),key=lambda x:x.y1)
        idx=0
        while True:
            if items[idx].text.strip()=="":
                idx+=1
                continue
            if idx+1>=len(items):
                break
            name=items[idx]
            kind=items[idx+1]
            diffy=kind.y1-name.y2
            #print "Name, kind:",name,kind
            #print name.text,kind.text,diffy
            assert kind.text.count("VOR") or kind.text.count("DME") or kind.text.count("NDB")
            assert diffy<0.5
            #print "Frq cnt: <%s>"%(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05),)
            freqraw=" ".join(page.get_lines(page.get_partially_in_rect(freqheading.x1,name.y1+0.05,freqheading.x2,kind.y2-0.05)))
            short,freq=re.match(r"\s*([A-Z]{2,3})?\s*(\d+(?:\.?\d+)\s+(?:MHz|kHz))\s*(?:H24)?\s*",freqraw).groups()
            
            posraw=" ".join(page.get_lines(page.get_partially_in_rect(coordheading.x1,name.y1+0.05,coordheading.x2,kind.y2-0.05)))
            #print "Rawpos<%s>"%(posraw,)
            pos=mapper.parse_coords(*re.match(r".*?(\d+\.\d+[NS]).*?(\d+\.\d+[EW]).*",posraw).groups())
            #print "Name: %s, Shortname: %s, Freq: %s,pos: %s"%(name.text,short,freq,pos)
            points.append(dict(
                name=short+" "+kind.text.strip()+" "+name.text.strip(),
                short=short,
                kind="nav-aid",
                pos=pos,
                freq=freq))
            idx+=2        
    
    
    return points
Example #42
0
def publish(id):
    record = db_session.query(Page).get(id)
    if record is not None:
        parser = Parser()
        html = parser.parse(record.data)
        if record.slug is not None:
            dir_name = record.slug
        else:
            dir_name = record.key

        if record.access == 'public':
            file_path = '%s%s' % (config['generate']['public_path'], dir_name)
            url = dir_name
        elif record.access == 'limited':
            file_path = '%s%s' % (config['generate']['limited_path'], dir_name)
            url = '%s?key=%s' % (dir_name, record.key)
        else:
            # access is private so do nothing
            return jsonify(published=record.published.strftime('%B %d, %Y at %I:%M%p'))

        if not os.path.exists(file_path):
            os.makedirs(file_path)

        full_path = '%s/index.html' % file_path
        with open(full_path, "w") as fh:
            fh.write(html)

        # set published date
        record.published = datetime.now()
        db_session.add(record)
        db_session.commit()
        return jsonify(
            published_date=record.published.strftime('%B %d, %Y at %I:%M%p'),
            url=url
        )

    return jsonify(error=True, message="Not found")
Example #43
0
    def __init__(self):
        self.__invertedindex = IndexBuilder().index
        self.pp = Parser()
        self.pp.normalize("a")
        self.pagerank = []
        with open("urllist", "r") as f1:  # 打开文件urllist
            self.__num1 = int(f1.readline())  # 总url数目
            self.urllist = []
            n = 0

            while n < self.__num1:  # 将url信息存入字典中
                s = f1.readline()
                arr = s.split(" ")
                # urlid = int(arr[0])          #url ID
                url = arr[1]  # url地址
                indegree = int(arr[2])  # url入度:用于计算PageRank
                outdegree = int(arr[3])  # url出度
                length_of_texts = int(arr[4])
                self.urllist.append([url, indegree, outdegree, length_of_texts])
                n = n + 1
        with open("pagerank", "r") as file:
            for line in file:
                self.pagerank.append(float(line))
Example #44
0
                    data = self.data["testing"]["data"][i].T
                    labels = self.data["testing"]["labels"][i].T 
                else:
                    data = np.append(data, self.data["testing"]["data"][i].T, axis=0)
                    labels = np.append(labels, self.data["testing"]["labels"][i].T, axis=0)
        return data, labels

    def testing(self, c=20):
        k = c/(100/self.k)
        data, labels = None, None
        for i in range(self.k-1, self.k-k-1, -1):
            if data is None and labels is None:
                data = self.data["testing"]["data"][i].T
                labels = self.data["testing"]["labels"][i].T 
            else:
                data = np.append(data, self.data["testing"]["data"][i].T, axis=0)
                labels = np.append(labels, self.data["testing"]["labels"][i].T, axis=0)
        return data, labels


if __name__=="__main__":
    vote_parser = Parser('../data/vote/vote.config', '../data/vote/vote.data')
    vote_parser.parse_config()
    D = vote_parser.parse_data()
    print D.shape
    ucifolder = UCIFolder(D, normalize=False, shuffle=False)
    for c in [5,10,15,20,30,50,80]:
        train_data, train_labels = ucifolder.training(c)
        test_data, test_labels = ucifolder.testing()
        print train_data.shape, test_data.shape
Example #45
0
                        if i<length-1:
                            attach[position[i+1]]+=1
                            i+=1
                        else:
                            notend=False
        else:
            cpl.append(phrase_list)
        return cpl


if __name__ == '__main__':
    cc=correction()
    word=raw_input('输入纠正词: ').decode('utf-8')
    choose=input('输入选择:1.普通查找 2.精确查找\n')
    if choose==1:
        pp=Parser()
        phrase_list=pp.normalize(word)
    else:
        if choose==2:
            i=0
            phrase_list=[]
            while i < len(word):
                phrase_list.append(word[i])
                i+=1
    '''for ii in phrase_list:
        print(ii.encode('utf-8'))'''
    print('________________before correct________________')
    ll=cc.correct(phrase_list)
    if len(ll)==0:
        print('no correct')
    else:
Example #46
0
 def __init__(self):
     self.session = requests.Session()
     self.session.headers = {'user-agent': 'shr-podcasts-bot'}
     self.scraper = Scraper(self.session)
     self.parser = Parser(self.session)
     self.storage = Storage()
Example #47
0
class searcher:
    # IndexBuilder().index
    # ... = Parser()
    # ....normalize(str) #['word','word'...]
    # 定义构造方法
    def __init__(self):
        self.__invertedindex = IndexBuilder().index
        self.pp = Parser()
        self.pp.normalize("a")
        self.pagerank = []
        with open("urllist", "r") as f1:  # 打开文件urllist
            self.__num1 = int(f1.readline())  # 总url数目
            self.urllist = []
            n = 0

            while n < self.__num1:  # 将url信息存入字典中
                s = f1.readline()
                arr = s.split(" ")
                # urlid = int(arr[0])          #url ID
                url = arr[1]  # url地址
                indegree = int(arr[2])  # url入度:用于计算PageRank
                outdegree = int(arr[3])  # url出度
                length_of_texts = int(arr[4])
                self.urllist.append([url, indegree, outdegree, length_of_texts])
                n = n + 1
        with open("pagerank", "r") as file:
            for line in file:
                self.pagerank.append(float(line))

    def search_cos(self, query, pagerank=True):
        querydict_tf = {}
        weight = {}
        scoredict = {}
        length = 0
        heap = []
        urlids = []
        self.querylist = self.pp.normalize(query)
        totaldoc = len(self.urllist)
        for item in self.querylist:
            if item in querydict_tf:
                querydict_tf[item] += 1
            else:
                querydict_tf[item] = 1
        for item in querydict_tf.iterkeys():
            if item in self.__invertedindex:
                weight[item] = (1.0 + math.log10(querydict_tf[item])) * math.log10(
                    1.0 * totaldoc / self.__invertedindex[item][0]
                )
            else:
                weight[item] = 0

        i = 0
        while i < self.__num1:
            score = 0

            for item in weight.iterkeys():
                if item in self.__invertedindex and str(i) in self.__invertedindex[item][1]:
                    score += weight[item] * self.__invertedindex[item][1][str(i)][1]
            if pagerank:
                score *= self.pagerank[i]
            uid = id_score(i, score)
            if uid.score > 0:
                if len(heap) <= 50:
                    heapq.heappush(heap, uid)
                else:
                    heapq.heappushpop(heap, uid)

            i += 1

        # 输出
        while len(heap) > 0:
            tmp = heapq.heappop(heap).urlid
            urlids.append(tmp)
        urlids.reverse()
        return urlids

    # boolean search
    def boolean(self, query):
        query = self.pp.normalize(query)  # 解析query
        # character = []
        # for term in query:
        #     print type(term)
        #     query.append(term)
        character_set = list(set(query))  # 去重

        # 根据term的倒排索引数目排序
        # character_set = []
        # for term in character:
        #     T = (term, len(self.__invertedindex[term][1]))
        #     character_set.append(T)
        # character_set.sort(lambda x, y: cmp(x[1], y[1]))

        # 获取倒排文件索引
        finalindex = self.__invertedindex.get(character_set[0], [0, {}, 0])[1].keys()  # 获得第一个term的倒排文件索引
        for term in character_set:
            if finalindex:
                index = self.__invertedindex.get(term, [0, {}, 0])[1].keys()  # 获得第i个term的倒排文件索引
                finalindex = list(set(finalindex) & set(index))
            else:
                return finalindex

        heap = []
        for url in finalindex:
            score = 0
            for term in character_set:
                score = score + self.__invertedindex.get(term, [0, {}, 0])[1][url][0]
            heap.append(id_score(int(url), score))
        heapq.heapify(heap)

        urlids = []
        while len(heap) > 0:
            tmp = heapq.heappop(heap).urlid
            urlids.append(tmp)
        urlids.reverse()
        return urlids

    def gettitle(url):
        try:
            req_header = {
                "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6"
            }
            req = urllib2.Request(url, None, req_header)
            page = urllib2.urlopen(req, None, 54)
            html = page.read()
            page.close()
            soup = BeautifulSoup(html)
            title = soup.title
            title = title.string
        except Exception as e:
            print e
            title = None
        return title
Example #48
0
flt   = Tok(r'[\+\-]?(\d+\.)|(\d*\.\d+)') < Float
int_  = Tok(r'[\+\-]?\d+')                < Int
nam   = Tok(r'\w+')                       < Name
var   = nam
plus  = Tok(r'\+')
dash  = Tok(r'\-')
star  = Tok(r'\*')
slash = Tok(r'\/')
mod   = Tok(r'\%')
dstar = Tok(r'\*\*')
opar  = Tok(r'\(')
cpar  = Tok(r'\)')
equal = Tok(r'\=')
kw_print = Tok(r'print')

exprs = Parser()
exprl = Parser()
expr  = Parser()
prim  = Parser()
expo  = Parser()
sign  = Parser()
fact  = Parser()
summ  = Parser()
asgn  = Parser()
prin  = Parser()

prim.parser = flt | int_ | var | opar + expr - cpar
expo.parser = (
    (prim - dstar & expo < Pow) |
    prim
)
Example #49
0
 def call(self, vm, arg):
     from parse import Parser, ParseError
     return Parser.parse_line(vm, arg)
 def test_parse_user_ids(self):
     parser = Parser()
     parser.fft_orders = self.order_hash
     self.assertEqual([7,8,9,1], parser.user_ids_from_orders(parser.fft_orders))
Example #51
0
end = Tok(r'$') < (lambda _: None)
num = Tok(r'[\+\-]?(\d+\.?)|(\d*\.\d+)') < float
nam = Tok(r'\w+')
var = nam < (lambda x : env[x])
plus  = Tok(r'\+')
dash  = Tok(r'\-')
star  = Tok(r'\*')
slash = Tok(r'\/')
mod   = Tok(r'\%')
dstar = Tok(r'\*\*')
opar  = Tok(r'\(')
cpar  = Tok(r'\)')
equal = Tok(r'\=')
rr    = Tok(r'\>\>')

exprs = Parser()
expr = Parser()
prim = Parser()
expo = Parser()
sign = Parser()
fact = Parser()
summ = Parser()
asgn = Parser()
prin = Parser()

prim.parser = num | var | opar + expr - cpar
expo.parser = (
    (prim - dstar & expo < (lambda a, b: a ** b)) |
    prim
)
sign.parser = (
Example #52
0
File: secd.py Project: ganow/secd
            code = parser.run(buf)
            secd.c = [code]
            secd.update()

            secd.clear()
            buf = []
            i += 1
            print
            print 'secd %d :> ' % i,
        else:
            buf.append(line)
            i += 1
            print
            print 'secd %d :> ' % i,


if __name__ == '__main__':

    secd = SECDMachine()
    parser = Parser()

    if len(sys.argv) == 2:
        print 'run script:', sys.argv[1]
        code = parser.run_from_file(sys.argv[1])
        secd.c = [code]
        print secd
        secd.update()

    else:
        print 'dialog mode'
        dialog(secd, parser)
Example #53
0

if __name__ == "__main__":
    import getopt, sys
    opts, args = getopt.getopt(sys.argv[1:], "s")

    use_stemming = False
    for o, a in opts:
        if o in ('-s','--stemming'):
            use_stemming = True

    print 'use stemming: %d' % use_stemming

    filelist = [(path + f) for f in os.listdir(path)]

    parser = Parser(fstopname)
    for stem in [use_stemming,]:
        for idf in True, False:
            print 'Parsing files...',
            stdout.flush()
            parser.parse(filelist[:10000], stem)
            # Ignore the 30% least and most frequent words
            parser.words = slice_sorted_words(parser.words, 30)
            print 'done'

            print 'Normalizing frequencies...',
            stdout.flush()
            # Don't modify the original set
            for i, doc in enumerate(parser.docset):
                normalize(doc, parser.words, idf)
                print i
Example #54
0
 def __init__(self, config_file, data_file):
     vote_parser = Parser(config_file, data_file)
     vote_parser.parse_config()
     self.D = vote_parser.parse_data()
Example #55
0
def test_evaluator():
    from parse import Parser
    from lex import Tokenizer, mockfile

    from function import make_builtins

    global_env = make_builtins()

    test_list = """

# NEXT 4;
# NEXT def x = 4;
# NEXT def x = 4; x;
# NEXT def x = ["a", "b"]; x;
# NEXT def x = ["a", "b"]; x[0];
# NEXT ["a", "b"][0];
# NEXT {4;}[];
# NEXT ![x]{x;};
# NEXT ![x]{x;}[5];
# NEXT 
def x = 6;
def ret4 = ![x]{
  set x = 4;
  x;
};
set x = 9;
ret4[x];
x;
# NEXT 
def a = 1;
def b = ();
def c = ( d=3);
# NEXT 4.str;
# NEXT 4.str[];
# NEXT 4.add[2];

# NEXT 

def add4 = {
  ![y]{4.add[y]}
};
add4[][1];

# NEXT 

def addN = ![n]{
  ![y]{n.add[y]}
};
var add2 = addN[2];

add2[9];

# NEXT 

true;

# NEXT 

true.ifTrue[{4},{5}];

# NEXT 

false.ifTrue[{4},{5}];


# NEXT File;
# NEXT File["../README.md"];
# NEXT 

def x = File["../README.md"]; 
x.read[1];
var y = x.read[5];
x.close[];
y;

# NEXT 

def x = ( y=4 );
def n = { inc x; y };
n[];

# NEXT 

def x = {
  def y = 4;
  Frame[];
};

x[];


# NEXT 


def Statement = {
  var kind;
  def str  = {Error["not implemented"]};
  def call = {Error["not implemented"]};
  def eval = {Error["not implemented"]};
  def get  = {Error["not implemented"]};
  Frame[];
}[];

Print["Statement = "];

Print[Statement];

def Var = {
  inc Statement;
  set kind = "var";
  var name;
  var chain;

  Frame[];
}[];

Print["Var = "];
Print[Var];

4;


""".split("# NEXT")

    print "-"*50
    for inputString in test_list:
        print inputString.strip()

        tokenizer = Tokenizer(mockfile(inputString), False)
        tokenList = tokenizer.read_all()
        tokenString = tokenizer.as_string()
        print tokenString

        parser = Parser(tokenList)
        code = parser.read_all()
        codeString = parser.as_string()
        print codeString

        env = Dict(global_env)
        val = code.evaluate(env)
        result = val.call(None, List([]), env)

        if result is not None:
            print result.as_string("")
        else:
            print None
        print "-"*50
    tab = 0
    for i in range(len(para)):
        sys.stdout.write(para[i])
        if para[i] == "(":
            tab += 1
        elif para[i] == ")":
            tab -= 1
            if (i+1 < len(para) and para[i+1] != ')'):
                sys.stdout.write('\n' + tab*'\t')

if __name__ == '__main__':
    if (len(sys.argv) < 2):
        print "Error"
    else:
       
        file = sys.argv[1]
        sentenceFile = sys.argv[2]
        gram = parseGrammar(file)
        p = Parser(gram)
        p.parseSentenceFile(sentenceFile, gram)
        
        if ("-t" in sys.argv):
            output = prettyPrint('ROOT', "", gram)
	    print output            
#formatPretty(output)

        #prettyprint here



Example #57
0
def extract_airfields(filtericao=lambda x: True, purge=True):
    # print getxml("/AIP/AD/AD 1/ES_AD_1_1_en.pdf")
    ads = []
    p = Parser("/AIP/AD/AD 1/ES_AD_1_1_en.pdf")
    points = dict()
    startpage = None
    for pagenr in xrange(p.get_num_pages()):
        page = p.parse_page_to_items(pagenr)
        if page.count("Aerodrome directory"):
            startpage = pagenr
            break
    if startpage == None:
        raise Exception("Couldn't find aerodrome directory in file")
    # print "Startpage: %d"%(startpage,)
    # nochartf=open("nochart.txt","w")
    for pagenr in xrange(startpage, p.get_num_pages()):
        row_y = []
        page = p.parse_page_to_items(pagenr)
        allines = [x for x in (page.get_lines(page.get_partially_in_rect(0, 0, 15, 100))) if x.strip()]
        for item, next in zip(allines, allines[1:] + [""]):
            # print "item:",item

            m = re.match(ur"^\s*[A-ZÅÄÖ]{3,}(?:/.*)?\b.*", item)
            if m:
                # print "Candidate, next is:",next
                if re.match(r"^\s*[A-Z]{4}\b.*", next):
                    # print "Matched:",item
                    # print "y1:",item.y1
                    row_y.append(item.y1)
        for y1, y2 in zip(row_y, row_y[1:] + [100.0]):
            # print "Extacting from y-range: %f-%f"%(y1,y2)
            items = list(page.get_partially_in_rect(0, y1 - 0.25, 5.0, y2 + 0.25, ysort=True))
            if len(items) >= 2:
                # print "Extract items",items
                ad = dict(name=unicode(items[0].text).strip(), icao=unicode(items[1].text).strip())
                # print "Icao:",ad['icao']
                assert re.match(r"[A-Z]{4}", ad["icao"])
                if not filtericao(ad):
                    continue
                if len(items) >= 3:
                    # print "Coord?:",items[2].text
                    m = re.match(r".*(\d{6}N)\s*(\d{7}E).*", items[2].text)
                    if m:
                        lat, lon = m.groups()
                        ad["pos"] = parse_coords(lat, lon)
                        # print "Items3:",items[3:]
                        elev = re.findall(r"(\d{1,5})\s*ft", " ".join(t.text for t in items[3:]))
                        # print "Elev:",elev
                        assert len(elev) == 1
                        ad["elev"] = int(elev[0])

                ads.append(ad)

    big_ad = set()
    for ad in ads:
        if not ad.has_key("pos"):
            big_ad.add(ad["icao"])

    for ad in ads:
        icao = ad["icao"]
        if icao in big_ad:
            if icao in ["ESIB", "ESNY", "ESCM", "ESPE"]:
                continue

            try:
                p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6_1_en.pdf" % (icao, icao))
            except:
                p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_6-1_en.pdf" % (icao, icao))

            ad["aipvacurl"] = p.get_url()
            for pagenr in xrange(p.get_num_pages()):
                page = p.parse_page_to_items(pagenr)

                """
                for altline in exitlines:
                    m=re.match(r"(\w+)\s+(\d+N)\s*(\d+E.*)",altline)
                    if not m: continue
                    name,lat,lon=m.groups()
                    try:
                        coord=parse_coords(lat,lon)
                    except Exception:
                        continue
                    points.append(dict(name=name,pos=coord))
                """

                for kind in xrange(2):
                    if kind == 0:
                        hits = page.get_by_regex(r"H[Oo][Ll][Dd][Ii][Nn][Gg]")
                        kind = "holding point"
                    if kind == 1:
                        hits = page.get_by_regex(r"[Ee]ntry.*[Ee]xit.*point")
                        kind = "entry/exit point"
                    if len(hits) == 0:
                        continue
                    for holdingheading in hits:

                        items = sorted(
                            page.get_partially_in_rect(
                                holdingheading.x1 + 2.0, holdingheading.y2 + 0.1, holdingheading.x1 + 0.5, 100
                            ),
                            key=lambda x: x.y1,
                        )
                        items = [x for x in items if not x.text.startswith(" ")]
                        # print "Holding items:",items
                        for idx, item in enumerate(items):
                            print "Holding item", item
                            y1 = item.y1
                            if idx == len(items) - 1:
                                y2 = 100
                            else:
                                y2 = items[idx + 1].y1
                            items2 = [
                                x
                                for x in page.get_partially_in_rect(item.x1 + 1, y1 + 0.3, item.x1 + 40, y2 - 0.1)
                                if x.x1 >= item.x1 - 0.25 and x.y1 >= y1 - 0.05 and x.y1 < y2 - 0.05
                            ]
                            s = (" ".join(page.get_lines(items2))).strip()
                            print "Holding lines:", repr(page.get_lines(items2))
                            # if s.startswith("ft Left/3"): #Special case for ESOK
                            #    s,=re.match("ft Left/3.*?([A-Z]{4,}.*)",s).groups()
                            # m=re.match("ft Left/\d+.*?([A-Z]{4,}.*)",s)
                            # if m:
                            #    s,=m.groups()

                            if s.startswith("LjUNG"):  # Really strange problem with ESCF
                                s = s[0] + "J" + s[2:]
                            if s.lower().startswith("holding"):
                                sl = s.split(" ", 1)
                                if len(sl) > 1:
                                    s = sl[1]
                            s = s.strip()
                            if kind == "entry/exit point" and s.startswith("HOLDING"):
                                continue  # reached HOLDING-part of VAC

                            # Check for other headings
                            # Fixup strange formatting of points in some holding items: (whitespace between coord and 'E')
                            s = re.sub(ur"(\d+)\s*(N)\s*(\d+)\s*(E)", lambda x: "".join(x.groups()), s)

                            m = re.match(r"([A-Z]{2,}).*?(\d+N)\s*(\d+E).*", s)
                            if not m:
                                m = re.match(r".*?(\d+N)\s*(\d+E).*", s)
                                if not m:
                                    continue
                                assert m
                                lat, lon = m.groups()
                                # skavsta
                                if icao == "ESKN":
                                    if s.startswith(u"Hold north of T"):
                                        name = "NORTH"
                                    elif s.startswith(u"Hold south of B"):
                                        name = "SOUTH"
                                    else:
                                        assert 0
                                # add more specials here
                                else:
                                    continue
                            else:
                                name, lat, lon = m.groups()
                            try:
                                coord = parse_coords(lat, lon)
                            except Exception:
                                print "Couldn't parse:", lat, lon
                                continue
                            # print name,lat,lon,mapper.format_lfv(*mapper.from_str(coord))

                            if name.count("REMARK") or len(name) <= 2:
                                print "Suspicious name: ", name
                                # sys.exit(1)
                                continue
                            points[icao + " " + name] = dict(name=icao + " " + name, icao=icao, pos=coord, kind=kind)

    # for point in points.items():
    #    print point

    # sys.exit(1)

    def fixhex11(s):
        out = []
        for c in s:
            i = ord(c)
            if i >= 0x20:
                out.append(c)
                continue
            if i in [0x9, 0xA, 0xD]:
                out.append(c)
                continue
            out.append(" ")

        return "".join(out)

    for ad in ads:
        icao = ad["icao"]
        if icao in big_ad:
            # print "Parsing ",icao
            p = Parser("/AIP/AD/AD 2/%s/ES_AD_2_%s_en.pdf" % (icao, icao), loadhook=fixhex11)
            ad["aiptexturl"] = p.get_url()
            firstpage = p.parse_page_to_items(0)
            te = "\n".join(firstpage.get_all_lines())
            # print te
            coords = re.findall(r"ARP.*(\d{6}N)\s*(\d{7}E)", te)
            if len(coords) > 1:
                raise Exception(
                    "First page of airport info (%s) does not contain exactly ONE set of coordinates" % (icao,)
                )
            if len(coords) == 0:
                print "Couldn't find coords for ", icao
            # print "Coords:",coords
            ad["pos"] = parse_coords(*coords[0])

            elev = re.findall(r"Elevation.*?(\d{1,5})\s*ft", te, re.DOTALL)
            if len(elev) > 1:
                raise Exception(
                    "First page of airport info (%s) does not contain exactly ONE elevation in ft" % (icao,)
                )
            if len(elev) == 0:
                print "Couldn't find elev for ", icao
            ad["elev"] = int(elev[0])
            freqs = []
            found = False
            thrs = []
            # uprint("-------------------------------------")
            for pagenr in xrange(p.get_num_pages()):
                page = p.parse_page_to_items(pagenr)
                # uprint("Looking on page %d"%(pagenr,))
                if (
                    0
                ):  # opening hours are no longer stored in a separate document for any airports. No need to detect which any more (since none are).
                    for item in page.get_by_regex(".*OPERATIONAL HOURS.*"):
                        lines = page.get_lines(page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100))
                        for line in lines:
                            things = ["ATS", "Fuelling", "Operating"]
                            if not line.count("AIP SUP"):
                                continue
                            for thing in things:
                                if line.count(thing):
                                    ad["aipsup"] = True

                for item in page.get_by_regex(".*\s*RUNWAY\s*PHYSICAL\s*CHARACTERISTICS\s*.*"):
                    # uprint("Physical char on page")
                    lines = page.get_lines(page.get_partially_in_rect(0, item.y2 + 0.1, 100, 100))
                    seen_end_rwy_text = False
                    for line, nextline in izip(lines, lines[1:] + [None]):
                        # uprint("MAtching: <%s>"%(line,))
                        if re.match(ur"AD\s+2.13", line):
                            break
                        if line.count("Slope of"):
                            break
                        if line.lower().count("end rwy:"):
                            seen_end_rwy_text = True
                        if line.lower().count("bgn rwy:"):
                            seen_end_rwy_text = True
                        m = re.match(ur".*(\d{6}\.\d+)[\s\(\)\*]*(N).*", line)
                        if not m:
                            continue
                        m2 = re.match(ur".*(\d{6,7}\.\d+)\s*[\s\(\)\*]*(E).*", nextline)
                        if not m2:
                            continue
                        latd, n = m.groups()
                        lond, e = m2.groups()
                        assert n == "N"
                        assert e == "E"
                        lat = latd + n
                        lon = lond + e
                        rwytxts = page.get_lines(page.get_partially_in_rect(0, line.y1 + 0.05, 12, nextline.y2 - 0.05))
                        uprint("Rwytxts:", rwytxts)
                        rwy = None
                        for rwytxt in rwytxts:
                            # uprint("lat,lon:%s,%s"%(lat,lon))
                            # uprint("rwytext:",rwytxt)
                            m = re.match(ur"\s*(\d{2}[LRCM]?)\b.*", rwytxt)
                            if m:
                                assert rwy == None
                                rwy = m.groups()[0]
                        if rwy == None and seen_end_rwy_text:
                            continue
                        print "Cur airport:", icao
                        already = False
                        assert rwy != None
                        seen_end_rwy_text = False
                        for thr in thrs:
                            if thr["thr"] == rwy:
                                raise Exception("Same runway twice on airfield:" + icao)
                        thrs.append(dict(pos=mapper.parse_coords(lat, lon), thr=rwy))
            assert len(thrs) >= 2
            for pagenr in xrange(0, p.get_num_pages()):
                page = p.parse_page_to_items(pagenr)

                matches = page.get_by_regex(r".*ATS\s+COMMUNICATION\s+FACILITIES.*")
                # print "Matches of ATS COMMUNICATION FACILITIES on page %d: %s"%(pagenr,matches)
                if len(matches) > 0:
                    commitem = matches[0]
                    curname = None

                    callsign = page.get_by_regex_in_rect(ur"Call\s*sign", 0, commitem.y1, 100, commitem.y2 + 8)[0]

                    for idx, item in enumerate(
                        page.get_lines(
                            page.get_partially_in_rect(callsign.x1 - 0.5, commitem.y1, 100, 100),
                            fudge=0.3,
                            order_fudge=15,
                        )
                    ):
                        if item.strip() == "":
                            curname = None
                        if re.match(".*RADIO\s+NAVIGATION\s+AND\s+LANDING\s+AIDS.*", item):
                            break
                        # print "Matching:",item
                        m = re.match(r"(.*?)\s*(\d{3}\.\d{1,3})\s*MHz.*", item)
                        # print "MHZ-match:",m
                        if not m:
                            continue
                        # print "MHZ-match:",m.groups()
                        who, sfreq = m.groups()
                        freq = float(sfreq)
                        if abs(freq - 121.5) < 1e-4:
                            if who.strip():
                                curname = who
                            continue  # Ignore emergency frequency, it is understood
                        if not who.strip():
                            if curname == None:
                                continue
                        else:
                            curname = who
                        freqs.append((curname.strip().rstrip("/"), freq))

            for pagenr in xrange(0, p.get_num_pages()):
                page = p.parse_page_to_items(pagenr)

                matches = page.get_by_regex(r".*ATS\s*AIRSPACE.*")
                # print "Matches of ATS_AIRSPACE on page %d: %s"%(pagenr,matches)
                if len(matches) > 0:
                    heading = matches[0]
                    desigitem, = page.get_by_regex("Designation and lateral limits")
                    vertitem, = page.get_by_regex("Vertical limits")
                    airspaceclass, = page.get_by_regex("Airspace classification")

                    lastname = None
                    subspacelines = dict()
                    subspacealts = dict()
                    for idx, item in enumerate(
                        page.get_lines(page.get_partially_in_rect(desigitem.x2 + 1, desigitem.y1, 100, vertitem.y1 - 1))
                    ):

                        if item.count("ATS airspace not established"):
                            assert idx == 0
                            break

                        if item.strip() == "":
                            continue
                        m = re.match(r"(.*?)(\d{6}N\s+.*)", item)
                        if m:
                            name, coords = m.groups()
                            name = name.strip()
                        else:
                            name = item.strip()
                            coords = None
                        if name:
                            lastname = name
                        if coords:
                            subspacelines.setdefault(lastname, []).append(coords)
                        assert lastname
                    lastname = None

                    # print "Spaces:",subspacelines
                    # print "ICAO",ad['icao']
                    # altlines=page.get_lines(page.get_partially_in_rect(vertitem.x2+1,vertitem.y1,100,airspaceclass.y1-0.2))

                    # print "Altlines:",altlines
                    subspacealts = dict()
                    subspacekeys = subspacelines.keys()

                    allaltlines = " ".join(
                        page.get_lines(
                            page.get_partially_in_rect(
                                vertitem.x1 + 0.5, vertitem.y1 + 0.5, 100, airspaceclass.y1 - 0.2
                            )
                        )
                    )
                    single_vertlim = False
                    totalts = list(mapper.parse_all_alts(allaltlines))
                    # print "totalts:",totalts
                    if len(totalts) == 2:
                        single_vertlim = True

                    for subspacename in subspacekeys:
                        ceil = None
                        floor = None
                        subnames = [subspacename]
                        if subspacename.split(" ")[-1].strip() in ["TIA", "TIZ", "CTR", "CTR/TIZ"]:
                            subnames.append(subspacename.split(" ")[-1].strip())
                        # print "Parsing alts for ",subspacename,subnames
                        try:
                            for nametry in subnames:
                                if (
                                    single_vertlim
                                ):  # there's only one subspace, parse all of vertical limits field for this single one.
                                    items = [vertitem]
                                else:
                                    items = page.get_by_regex_in_rect(
                                        nametry, vertitem.x2 + 1, vertitem.y1, 100, airspaceclass.y1 - 0.2
                                    )
                                for item in items:
                                    alts = []
                                    for line in page.get_lines(
                                        page.get_partially_in_rect(
                                            item.x1 + 0.5, item.y1 + 0.5, 100, airspaceclass.y1 - 0.2
                                        )
                                    ):
                                        # print "Parsing:",line
                                        line = line.replace(nametry, "").lower().strip()
                                        parsed = list(mapper.parse_all_alts(line))
                                        if len(parsed):
                                            alts.append(mapper.altformat(*parsed[0]))
                                        if len(alts) == 2:
                                            break
                                    if alts:
                                        # print "alts:",alts
                                        ceil, floor = alts
                                        raise StopIteration
                        except StopIteration:
                            pass
                        assert ceil and floor
                        subspacealts[subspacename] = dict(ceil=ceil, floor=floor)

                    spaces = []
                    for spacename in subspacelines.keys():
                        altspacename = spacename
                        # print "Altspacename: %s, subspacesalts: %s"%(altspacename,subspacealts)
                        space = dict(
                            name=spacename,
                            ceil=subspacealts[altspacename]["ceil"],
                            floor=subspacealts[altspacename]["floor"],
                            points=parse_coord_str(" ".join(subspacelines[spacename])),
                            freqs=list(set(freqs)),
                        )

                        if True:
                            vs = []
                            for p in space["points"]:
                                x, y = mapper.latlon2merc(mapper.from_str(p), 13)
                                vs.append(Vertex(int(x), int(y)))
                            p = Polygon(vvector(vs))
                            if p.calc_area() <= 30 * 30:
                                pass  # print space
                                pass  # print "Area:",p.calc_area()
                            assert p.calc_area() > 30 * 30
                            # print "Area: %f"%(p.calc_area(),)

                        spaces.append(space)
                        # print space
                    ad["spaces"] = spaces
                    found = True
                if found:
                    break
            assert found
            ad["runways"] = rwy_constructor.get_rwys(thrs)
Example #58
0
from pascal_loader.main_io import PascalFile
from parse import Parser
from emulator import Emulator

if __name__ == '__main__':
    pretty_printer = pprint.PrettyPrinter()

    # UNCOMMENT the below statements one at a time

    # tokens = get_token(PascalFile(input_file_location='simple_assignment.pas', output_location=''))
    # tokens = get_token(PascalFile(input_file_location='complex_assignments.pas', output_location=''))
    # tokens = get_token(PascalFile(input_file_location='control_repeat.pas', output_location=''))
    # tokens = get_token(PascalFile(input_file_location='control_while.pas', output_location=''))
    # tokens = get_token(PascalFile(input_file_location='control_if.pas', output_location=''))
    # tokens = get_token(PascalFile(input_file_location='control_for.pas', output_location=''))
    # tokens = get_token(PascalFile(input_file_location='case_statement.pas', output_location=''))
    tokens = get_token(PascalFile(input_file_location='arrays.pas', output_location=''))

    # This prints tokens, uncomment to see the generated tokens
    # pretty_printer.pprint(tokens)
    print '----------------------------------'
    # setting verbose=True to parser will print to console as tokens are matched/warnings
    # parser = Parser(token_list=tokens, verbose=True)
    parser = Parser(token_list=tokens)
    byte_array = parser.parse()
    # This prints the byte array, uncomment to see the bytearray
    # pretty_printer.pprint(byte_array)
    print '----------------------------------'
    emulator = Emulator(byte_array)
    emulator.start()