コード例 #1
0
    def fetch(self):
      fd, tmp_file = tempfile.mkstemp()

      pip = PointInPolygon(self.polygon_id, 60)

      traffic_signs = []
      reader = json.loads(open(self.mapping, 'r').read())
      try:
        for row in reader:
          traffic_signs += row['object']
      except:
        self.logger.err(row)
        raise

      with open(tmp_file, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['accuracy', 'direction', 'image_key', 'first_seen_at', 'last_seen_at', 'value', 'X', 'Y'])

      slice = lambda A, n: [A[i:i+n] for i in range(0, len(A), n)]

      bboxes = pip.bboxes()

      start_time = (datetime.today() - timedelta(days=365*2)).isoformat()[0:10]
      b = 0
      for traffic_signs_ in slice(traffic_signs, 10):
        b = b + 1
        self.logger.log('Batch {0}/{1}: {2}'.format(b, round(len(traffic_signs) / 10 + 0.5), ','.join(traffic_signs_)))
        for bbox in bboxes:
          url = 'https://a.mapillary.com/v3/map_features?bbox={bbox}&client_id={client_id}&layers={layer}&per_page=1000&start_time={start_time}&values={values}'.format(bbox=','.join(map(str, bbox)), layer=self.layer, client_id='MEpmMTFQclBTUWlacjV6RTUxWWMtZzo5OTc2NjY2MmRiMDUwYmMw', start_time=start_time, values=','.join(traffic_signs_))
          print(url)
          with open(tmp_file, 'a') as csvfile:
            writer = csv.writer(csvfile)

            r = None
            page = 0
            while(url):
              page = page + 1
              self.logger.log("Page {0}".format(page))
              r = downloader.get(url)
              url = r.links['next']['url'] if 'next' in r.links else None

              features = r.json()['features']
              filtered = 0
              self.logger.log('{0} features fetched'.format(len(features)))
              for j in features:
                p = j['properties']
                image_key = p['detections'][0]['image_key']
                gc = j['geometry']['coordinates']
                row = [p['accuracy'], p['direction'] if 'direction' in p else None, image_key, p['first_seen_at'], p['last_seen_at'], p['value']] + gc
                if row[0] > 0.01 and pip.point_inside_polygon(gc[0], gc[1]):
                  writer.writerow(row)
                  filtered = filtered + 1
              self.logger.log('{0} keeped'.format(filtered))

      return tmp_file
コード例 #2
0
    def fetch(self, url, tmp_file, date_string=None):
      pip = PointInPolygon(self.polygon_id, 60)

      traffic_signs = []
      reader = json.loads(open(self.mapping, 'r').read())
      try:
        for row in reader:
          traffic_signs += row['object']
      except:
        self.logger.err(row)
        raise

      with open(tmp_file, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['accuracy', 'direction', 'image_key', 'first_seen_at', 'last_seen_at', 'value', 'X', 'Y'])

      slice = lambda A, n: [A[i:i+n] for i in range(0, len(A), n)]

      bboxes = pip.bboxes()

      start_time = (datetime.today() - timedelta(days=365*2)).isoformat()[0:10]
      b = 0
      for traffic_signs_ in slice(traffic_signs, 10):
        b = b + 1
        self.logger.log('Batch {0}/{1}: {2}'.format(b, ceil(len(traffic_signs) / 10.0), ','.join(traffic_signs_)))
        for bbox in bboxes:
          url = 'https://a.mapillary.com/v3/map_features?bbox={bbox}&client_id={client_id}&layers={layer}&per_page=1000&start_time={start_time}&values={values}'.format(bbox=','.join(map(str, bbox)), layer=self.layer, client_id='MEpmMTFQclBTUWlacjV6RTUxWWMtZzo5OTc2NjY2MmRiMDUwYmMw', start_time=start_time, values=','.join(traffic_signs_))
          self.logger.log(url)
          with open(tmp_file, 'a') as csvfile:
            writer = csv.writer(csvfile)

            r = None
            page = 0
            while(url):
              page = page + 1
              self.logger.log("Page {0}".format(page))
              r = downloader.get(url)
              url = r.links['next']['url'] if 'next' in r.links else None

              features = r.json()['features']
              filtered = 0
              self.logger.log('{0} features fetched'.format(len(features)))
              for j in features:
                p = j['properties']
                image_key = p['detections'][0]['image_key']
                gc = j['geometry']['coordinates']
                row = [p['accuracy'], p['direction'] if 'direction' in p else None, image_key, p['first_seen_at'], p['last_seen_at'], p['value']] + gc
                if row[0] > 0.01 and pip.point_inside_polygon(gc[0], gc[1]):
                  writer.writerow(row)
                  filtered = filtered + 1
              self.logger.log('{0} keeped'.format(filtered))

      return True
コード例 #3
0
ファイル: osmose_run.py プロジェクト: nuxper/osmose-backend
def execc(conf, logger, analysers, options, osmosis_manager):
    err_code = 0

    ## download and create database

    xml_change = None
    if not options.skip_init:
        if options.change and osmosis_manager.check_change(conf) and not options.change_init:
            xml_change = osmosis_manager.run_change(conf)

        elif "url" in conf.download:
            newer = False
            xml_change = None

            if not newer and options.skip_download:
                logger.sub().log("skip download")
                newer = True

            if not newer and options.diff and os.path.exists(conf.download["dst"]):
                status = False
                if options.pbf_update_tool == 'osmosis':
                    if osmosis_manager.check_osmosis_diff(conf):
                        (status, xml_change) = osmosis_manager.run_osmosis_diff(conf)
                else:
                    if osmosis_manager.check_osmium_diff(conf):
                        (status, xml_change) = osmosis_manager.run_osmium_diff(conf)
                if status:
                    newer = True

            if not newer:
                logger.log(logger.log_av_r+u"downloading"+logger.log_ap)
                newer = download.dl(conf.download["url"], conf.download["dst"], logger.sub(),
                                    min_file_size=8*1024)

                if newer and options.diff:
                    if options.pbf_update_tool == 'osmosis':
                        osmosis_manager.init_osmosis_diff(conf)
                    if "/minute/" in conf.download["diff"] or "/hour/" in conf.download["diff"]:
                        # update extract with any more recent available diff
                        if options.pbf_update_tool == 'osmosis':
                            osmosis_manager.run_osmosis_diff(conf)
                        else:
                            osmosis_manager.run_osmium_diff(conf)

            if not newer:
                return 0x11

            if osmosis_manager:
                osmosis_manager.init_database(conf)

            if options.change:
                osmosis_manager.init_change(conf)

        if hasattr(conf, "sql_post_scripts"):
            logger.log(logger.log_av_r+"import post scripts"+logger.log_ap)
            for script in conf.sql_post_scripts:
                osmosis_manager.psql_f(script)

        if osmosis_manager:
            osmosis_manager.update_metainfo(conf)

        if options.resume:
            osmosis_manager.run_resume(conf)

    ##########################################################################
    ## analyses

    version = get_version()

    lunched_analyser = []
    lunched_analyser_change = []
    lunched_analyser_resume = []

    for analyser in analysers:
        if not options.analyser and analyser not in conf.analyser:
            continue

        logger.log(logger.log_av_r + conf.country + " : " + analyser + logger.log_ap)

        password = conf.analyser.get(analyser)

        if not options.skip_upload and (not password or password == "xxx"):
            logger.sub().err("No password to upload result to %s" % conf.updt_url)

        try:
            analyser_conf = analyser_config(conf, options, osmosis_manager, xml_change)

            for name, obj in inspect.getmembers(analysers[analyser]):
                if (inspect.isclass(obj) and obj.__module__ == "analysers.analyser_" + analyser and
                    (name.startswith("Analyser") or name.startswith("analyser"))):
                    analyser_name = name[len("Analyser_"):]
                    resume = options.resume or (options.resume_analyser and analyser in options.resume_analyser)

                    dst = os.path.join(conf.dir_results, name + "-" + conf.country)
                    analyser_conf.error_file = issues_file_from_fromat(dst, options.result_format, bz2 = True, version = version, polygon_id = analyser_conf.polygon_id)

                    # analyse
                    if not options.skip_analyser:
                        with obj(analyser_conf, logger.sub()) as analyser_obj:
                            remote_timestamp = None
                            if not options.skip_frontend_check:
                                url = modules.config.url_frontend_update + "/../../control/status/%s/%s?%s" % (conf.country, analyser_name, 'objects=true' if resume else '')
                                resp = downloader.get(url)
                                if not resp.ok:
                                    logger.sub().err("Fails to get status from frontend: {0}".format(resp.status_code))
                                else:
                                    try:
                                        status = resp.json()
                                        remote_timestamp = dateutil.parser.parse(status['timestamp']) if status else None
                                        remote_analyser_version = int(status['analyser_version'])
                                    except Exception as e:
                                        logger.sub().err(e)

                            if analyser_obj.timestamp() and remote_timestamp and analyser_obj.timestamp() <= remote_timestamp and analyser_obj.analyser_version() == remote_analyser_version:
                                logger.sub().warn("Skip, frontend is already up to date")
                                continue

                            if resume and remote_timestamp and analyser_obj.analyser_version() == remote_analyser_version:
                                already_issued_objects = {'N': status['nodes'] or [], 'W': status['ways'] or [], 'R': status['relations'] or []}
                                analyser_obj.analyser_resume(remote_timestamp, already_issued_objects)
                                lunched_analyser_resume.append([obj, analyser_conf])
                            else:
                                if resume:
                                    if not remote_timestamp:
                                        logger.sub().err("No remote timestamp to resume from, start a full run")
                                    elif analyser_obj.analyser_version() == remote_analyser_version:
                                        logger.sub().err("Analyser version changed, start a full run")

                                if not options.change or not xml_change:
                                    analyser_obj.analyser()
                                    lunched_analyser.append([obj, analyser_conf])
                                else:
                                    analyser_obj.analyser_change()
                                    lunched_analyser_change.append([obj, analyser_conf])

                    # update
                    if not options.skip_upload and password != "xxx":
                        logger.sub().log("update")

                        if analyser in conf.analyser_updt_url:
                            list_urls = conf.analyser_updt_url[analyser]
                        else:
                            list_urls = [conf.updt_url]

                        for url in list_urls:
                            update_finished = False
                            nb_iter = 0
                            was_on_timeout = False
                            while not update_finished and nb_iter < 3:
                                time.sleep(nb_iter * 15)
                                nb_iter += 1
                                logger.sub().sub().log("iteration=%d" % nb_iter)
                                try:
                                    u = url + '?analyser=' + analyser_name + '&country=' + conf.country
                                    r = requests.post(u, timeout=1800, data={
                                        'analyser': analyser_name,
                                        'country': conf.country,
                                        'code': password
                                    }, files={
                                        'content': open(analyser_conf.error_file.dst, 'rb')
                                    })
                                    r.raise_for_status()
                                    logger.sub().sub().log(r.text.strip())
                                    update_finished = True
                                except requests.exceptions.HTTPError as e:
                                    if e.response.status_code == 504:
                                        was_on_timeout = True
                                        logger.sub().sub().sub().err('got an HTTP timeout status')
                                    else:
                                        dt = r.text.strip()
                                        logger.sub().sub().sub().err(u"UPDATE ERROR %s/%s : %s\n" % (conf.country, analyser_name, dt))
                                        if dt == "FAIL: Already up to date":
                                            update_finished = True
                                        if not was_on_timeout:
                                            err_code |= 4
                                except Exception as e:
                                    if isinstance(e, requests.exceptions.ConnectTimeout):
                                        was_on_timeout = True
                                        logger.sub().sub().sub().err('got a connection timeout')
                                    else:
                                        tb = traceback.format_exc()
                                        logger.sub().err('error on update...')
                                        for l in tb.splitlines():
                                            logger.sub().sub().log(l)

                        if not update_finished:
                            err_code |= 1

        except:
            tb = traceback.format_exc()
            logger.sub().err("error on analyse {0}...".format(analyser))
            for l in tb.splitlines():
                logger.sub().sub().log(l)
            err_code |= 2
            continue

    if not options.no_clean:
        for (obj, analyser_conf) in lunched_analyser:
            analyser_conf.error_file = None
            with obj(analyser_conf, logger.sub()) as analyser_obj:
                analyser_obj.analyser_deferred_clean()
        for (obj, analyser_conf) in lunched_analyser_change:
            analyser_conf.error_file = None
            with obj(analyser_conf, logger.sub()) as analyser_obj:
                analyser_obj.analyser_deferred_clean()
        for (obj, analyser_conf) in lunched_analyser_resume:
            analyser_conf.error_file = None
            with obj(analyser_conf, logger.sub()) as analyser_obj:
                analyser_obj.analyser_deferred_clean()

    return err_code
コード例 #4
0
def dl(url, local, logger=OsmoseLog.logger(), min_file_size=10 * 1024):

    unzip = False
    convert_pbf = False

    # file names
    file_ts = local + ".ts"
    url_ext = os.path.splitext(url)[1]
    local_ext = os.path.splitext(local)[1]
    if (url_ext in [".bz2"]) and (local_ext not in [".bz2"]):
        file_dl = local + url_ext
        unzip = True
    elif (url_ext in [".pbf"]) and (local_ext not in [".pbf"]):
        file_dl = local + url_ext
        convert_pbf = True
    else:
        file_dl = local

    headers = {}

    # make the download conditional
    if os.path.exists(file_dl) and os.path.exists(file_ts):
        headers["If-Modified-Since"] = open(file_ts).read()

    # request fails with a 304 error when the file wasn't modified
    # Retry on 404, workaround Geofabrik update in progress
    answer = downloader.get(
        url,
        headers=headers,
        session=downloader.requests_retry_session(
            status_forcelist=downloader.DEFAULT_RETRY_ON + (404, )))
    if answer.status_code == 304:
        logger.log(u"not newer")
        return False
    if not answer.ok:
        logger.log(u"got error %d" % answer.status_code)
        logger.log(u"  URL=%s" % url)
        answer.raise_for_status()

    url_ts = answer.headers.get('Last-Modified')

    file_size = int(answer.headers.get('Content-Length'))
    if file_size < min_file_size:
        # file must be bigger than 100 KB
        logger.log("File is not big enough: %d B" % file_size)
        raise SystemError

    # write the file
    with open(file_dl, "wb") as outfile:
        for data in answer.iter_content(chunk_size=None):
            outfile.write(data)

    if not answer.headers.get(
            'Content-Encoding') and file_size != os.path.getsize(file_dl):
        logger.log(
            u"error: Download file (%d) not of the expected size (%d) for %s" %
            (os.path.getsize(file_dl), file_size, url))
        os.remove(file_dl)
        return False

    # uncompress
    if unzip:
        logger.log(u"bunzip2")
        subprocess.check_output(['bunzip2', '-f', file_dl])

    # convert pbf to osm
    if convert_pbf:
        logger.log(u"osmconvert")
        subprocess.check_output("{} {} > {}".format(config.bin_osmconvert,
                                                    file_dl, local),
                                shell=True)
        os.remove(file_dl)

    # set timestamp
    open(file_ts, "w").write(url_ts)

    return True
コード例 #5
0
ファイル: osmose_run.py プロジェクト: samatht/osmose-backend
def execc(conf, logger, options, osmosis_manager):
    err_code = 0

    version = get_version()

    logger.log("osmose backend version: %s" % version)

    ## download and create database

    country = conf.country

    if options.skip_init:
        pass

    elif options.change and osmosis_manager.check_change(
            conf) and not options.change_init:
        xml_change = osmosis_manager.run_change(conf)

    elif "url" in conf.download:
        newer = False
        xml_change = None

        if not newer and options.skip_download:
            logger.sub().log("skip download")
            newer = True

        if not newer and options.diff and osmosis_manager.check_diff(
                conf) and os.path.exists(conf.download["dst"]):
            (status, xml_change) = osmosis_manager.run_diff(conf)
            if status:
                newer = True

        if not newer:
            logger.log(logger.log_av_r + u"downloading" + logger.log_ap)
            newer = download.dl(conf.download["url"],
                                conf.download["dst"],
                                logger.sub(),
                                min_file_size=8 * 1024)

            if newer and options.diff:
                osmosis_manager.init_diff(conf)
                if "/minute/" in conf.download[
                        "diff"] or "/hour/" in conf.download["diff"]:
                    # update extract with any more recent available diff
                    osmosis_manager.run_diff(conf)

        if not newer:
            return 0x11

        if osmosis_manager:
            osmosis_manager.init_database(conf)

        if options.change:
            osmosis_manager.init_change(conf)

    if hasattr(conf, "sql_post_scripts"):
        logger.log(logger.log_av_r + "import post scripts" + logger.log_ap)
        for script in conf.sql_post_scripts:
            osmosis_manager.psql_f(script)

    if not options.skip_init and osmosis_manager:
        osmosis_manager.update_metainfo(conf)

    if options.resume:
        osmosis_manager.run_resume(conf)

    ##########################################################################
    ## analyses

    lunched_analyser = []
    lunched_analyser_change = []
    lunched_analyser_resume = []

    for analyser, password in conf.analyser.items():
        logger.log(logger.log_av_r + country + " : " + analyser +
                   logger.log_ap)

        if not "analyser_" + analyser in analysers:
            logger.sub().log("skipped")
            continue

        if password == "xxx":
            logger.sub().log("code is not correct - won't upload to %s" %
                             conf.updt_url)

        try:
            analyser_conf = analyser_config()
            analyser_conf.dst_dir = conf.dir_results

            analyser_conf.osmosis_manager = osmosis_manager
            analyser_conf.db_user = conf.db_user
            if conf.db_schema:
                analyser_conf.db_schema = conf.db_schema
            else:
                analyser_conf.db_schema = country
            analyser_conf.db_schema_path = conf.db_schema_path

            analyser_conf.dir_scripts = conf.dir_scripts
            analyser_conf.options = conf.analyser_options
            analyser_conf.polygon_id = conf.polygon_id

            if options.change and xml_change:
                analyser_conf.src = xml_change
            elif "dst" in conf.download:
                analyser_conf.src = conf.download["dst"]
                if "diff_path" in conf.download:
                    analyser_conf.src_state = os.path.join(
                        conf.download["diff_path"], "state.txt")

            for name, obj in inspect.getmembers(analysers["analyser_" +
                                                          analyser]):
                if (inspect.isclass(obj)
                        and obj.__module__ == "analysers.analyser_" + analyser
                        and (name.startswith("Analyser")
                             or name.startswith("analyser"))):
                    analyser_name = name[len("Analyser_"):]
                    analyser_conf.dst_file = name + "-" + country + ".xml"
                    analyser_conf.dst_file += ".bz2"
                    analyser_conf.dst = os.path.join(conf.dir_results,
                                                     analyser_conf.dst_file)
                    analyser_conf.version = version
                    analyser_conf.verbose = options.verbose

                    # analyse
                    if not options.skip_analyser:
                        with obj(analyser_conf, logger.sub()) as analyser_obj:
                            remote_timestamp = None
                            if not options.skip_frontend_check:
                                url = modules.config.url_frontend_update + "/../../control/status/%s/%s?%s" % (
                                    country, analyser_name,
                                    'objects=true' if options.resume else '')
                                resp = downloader.get(url)
                                if not resp.ok:
                                    logger.sub().err(
                                        "Fails to get status from frontend: {0}"
                                        .format(resp.status_code))
                                else:
                                    try:
                                        status = resp.json()
                                        remote_timestamp = dateutil.parser.parse(
                                            status['timestamp']
                                        ) if status else None
                                    except e:
                                        logger.sub().err(e)

                            if options.resume:
                                if remote_timestamp:
                                    already_issued_objects = {
                                        'N': status['nodes'] or [],
                                        'W': status['ways'] or [],
                                        'R': status['relations'] or []
                                    }
                                    analyser_obj.analyser_resume(
                                        remote_timestamp,
                                        already_issued_objects)
                                    lunched_analyser_resume.append(
                                        [obj, analyser_conf])
                                    continue
                                else:
                                    logger.sub().err("Not able to resume")

                            if analyser_obj.timestamp(
                            ) and remote_timestamp and analyser_obj.timestamp(
                            ) <= remote_timestamp:
                                logger.sub().warn(
                                    "Skip, frontend is already up to date")
                                continue

                            if not options.change or not xml_change:
                                analyser_obj.analyser()
                                lunched_analyser.append([obj, analyser_conf])
                            else:
                                analyser_obj.analyser_change()
                                lunched_analyser_change.append(
                                    [obj, analyser_conf])

                    # update
                    if not options.skip_upload and password != "xxx":
                        logger.sub().log("update")

                        if analyser in conf.analyser_updt_url:
                            list_urls = conf.analyser_updt_url[analyser]
                        else:
                            list_urls = [conf.updt_url]

                        for url in list_urls:
                            update_finished = False
                            nb_iter = 0
                            was_on_timeout = False
                            while not update_finished and nb_iter < 3:
                                time.sleep(nb_iter * 15)
                                nb_iter += 1
                                logger.sub().sub().log("iteration=%d" %
                                                       nb_iter)
                                try:
                                    u = url + '?name=' + name + '&country=' + (
                                        conf.db_schema or conf.country)
                                    r = requests.post(
                                        u,
                                        timeout=1800,
                                        data={
                                            'analyser': analyser_name,
                                            'country': country,
                                            'code': password
                                        },
                                        files={
                                            'content':
                                            open(analyser_conf.dst, 'rb')
                                        })
                                    r.raise_for_status()

                                    dt = r.text.strip()
                                    if dt == "FAIL: Already up to date" and was_on_timeout:
                                        logger.sub().sub().sub().err(
                                            (u"UPDATE ERROR %s/%s : %s\n" %
                                             (country, analyser_name,
                                              dt)).encode("utf8"))
                                        # Log error, but do not set err_code
                                    elif dt[-2:] != "OK":
                                        logger.sub().sub().sub().err(
                                            (u"UPDATE ERROR %s/%s : %s\n" %
                                             (country, analyser_name,
                                              dt)).encode("utf8"))
                                        err_code |= 4
                                    else:
                                        logger.sub().sub().log(dt)
                                    update_finished = True
                                except Exception as e:
                                    if isinstance(
                                            e,
                                            requests.exceptions.ConnectTimeout
                                    ) or (isinstance(
                                            e, requests.exceptions.HTTPError)
                                          and e.response.status_code == 504):
                                        was_on_timeout = True
                                        logger.sub().sub().sub().err(
                                            'got a timeout')
                                    else:
                                        tb = traceback.format_exc()
                                        logger.sub().err('error on update...')
                                        for l in tb.splitlines():
                                            logger.sub().sub().log(l)

                        if not update_finished:
                            err_code |= 1

        except:
            tb = traceback.format_exc()
            logger.sub().err("error on analyse {0}...".format(analyser))
            for l in tb.splitlines():
                logger.sub().sub().log(l)
            err_code |= 2
            continue

    if not options.no_clean:
        for (obj, analyser_conf) in lunched_analyser:
            analyser_conf.dst = None
            with obj(analyser_conf, logger.sub()) as analyser_obj:
                analyser_obj.analyser_deferred_clean()
        for (obj, analyser_conf) in lunched_analyser_change:
            analyser_conf.dst = None
            with obj(analyser_conf, logger.sub()) as analyser_obj:
                analyser_obj.analyser_deferred_clean()
        for (obj, analyser_conf) in lunched_analyser_resume:
            analyser_conf.dst = None
            with obj(analyser_conf, logger.sub()) as analyser_obj:
                analyser_obj.analyser_deferred_clean()

    return err_code
コード例 #6
0
ファイル: download.py プロジェクト: osm-fr/osmose-backend
def dl(url, local, logger=OsmoseLog.logger(), min_file_size=10*1024):

    unzip = False
    convert_pbf = False

    # file names
    file_ts = local+".ts"
    url_ext = os.path.splitext(url)[1]
    local_ext = os.path.splitext(local)[1]
    if (url_ext in [".bz2"]) and (local_ext not in [".bz2"]) :
        file_dl = local + url_ext
        unzip   = True
    elif (url_ext in [".pbf"]) and (local_ext not in [".pbf"]) :
        file_dl     = local + url_ext
        convert_pbf = True
    else:
        file_dl = local

    headers = {}

    # make the download conditional
    if os.path.exists(file_dl) and os.path.exists(file_ts):
        headers["If-Modified-Since"] = open(file_ts).read()

    # request fails with a 304 error when the file wasn't modified
    answer = downloader.get(url, headers=headers)
    if answer.status_code == 304:
        logger.log(u"not newer")
        return False
    if not answer.ok:
        logger.log(u"got error %d" % answer.status_code)
        logger.log(u"  URL=%s" % url)
        answer.raise_for_status()

    url_ts = answer.headers.get('Last-Modified')

    file_size = int(answer.headers.get('Content-Length'))
    if file_size < min_file_size:
        # file must be bigger than 100 KB
        logger.log("File is not big enough: %d B" % file_size)
        raise SystemError

    # write the file
    outfile = open(file_dl, "wb")
    try:
        for data in answer.iter_content(chunk_size=None):
            outfile.write(data)
    finally:
        outfile.close()

    if not answer.headers.get('Content-Encoding') and file_size != os.path.getsize(file_dl):
        logger.log(u"error: Download file (%d) not of the expected size (%d) for %s" % (os.path.getsize(file_dl), file_size, url))
        os.remove(file_dl)
        return False

    # uncompress
    if unzip:
        logger.log(u"bunzip2")
        subprocess.check_output(['bunzip2', '-f', file_dl])

    # convert pbf to osm
    if convert_pbf:
        logger.log(u"osmconvert")
        subprocess.check_output("{} {} > {}".format(config.bin_osmconvert, file_dl, local), shell=True)
        os.remove(file_dl)


    # set timestamp
    open(file_ts, "w").write(url_ts)

    return True
コード例 #7
0
ファイル: osmose_run.py プロジェクト: osm-fr/osmose-backend
def execc(conf, logger, options, osmosis_manager):
    err_code = 0

    version = get_version()

    logger.log("osmose backend version: %s" % version)

    ## download and create database

    country = conf.country

    if options.skip_init:
        pass

    elif options.change and osmosis_manager.check_change(conf) and not options.change_init:
        xml_change = osmosis_manager.run_change(conf)

    elif "url" in conf.download:
        newer = False
        xml_change = None

        if not newer and options.skip_download:
            logger.sub().log("skip download")
            newer = True

        if not newer and options.diff and osmosis_manager.check_diff(conf) and os.path.exists(conf.download["dst"]):
            (status, xml_change) = osmosis_manager.run_diff(conf)
            if status:
                newer = True

        if not newer:
            logger.log(logger.log_av_r+u"downloading"+logger.log_ap)
            newer = download.dl(conf.download["url"], conf.download["dst"], logger.sub(),
                                min_file_size=8*1024)

            if newer and options.diff:
                osmosis_manager.init_diff(conf)
                if "/minute/" in conf.download["diff"] or "/hour/" in conf.download["diff"]:
                    # update extract with any more recent available diff
                    osmosis_manager.run_diff(conf)

        if not newer:
            return 0x11

        if osmosis_manager:
            osmosis_manager.init_database(conf)

        if options.change:
            osmosis_manager.init_change(conf)

    if hasattr(conf, "sql_post_scripts"):
        logger.log(logger.log_av_r+"import post scripts"+logger.log_ap)
        for script in conf.sql_post_scripts:
            osmosis_manager.psql_f(script)

    if not options.skip_init and osmosis_manager:
        osmosis_manager.update_metainfo(conf)

    if options.resume:
        osmosis_manager.run_resume(conf)

    ##########################################################################
    ## analyses

    lunched_analyser = []
    lunched_analyser_change = []
    lunched_analyser_resume = []

    for analyser, password in conf.analyser.items():
        logger.log(logger.log_av_r + country + " : " + analyser + logger.log_ap)

        if not "analyser_" + analyser in analysers:
            logger.sub().log("skipped")
            continue

        if password == "xxx":
            logger.sub().log("code is not correct - won't upload to %s" % conf.updt_url)

        try:
            analyser_conf = analyser_config()
            analyser_conf.dst_dir = conf.dir_results

            analyser_conf.osmosis_manager = osmosis_manager
            analyser_conf.db_user = conf.db_user
            if conf.db_schema:
                analyser_conf.db_schema = conf.db_schema
            else:
                analyser_conf.db_schema = country
            analyser_conf.db_schema_path = conf.db_schema_path

            analyser_conf.dir_scripts = conf.dir_scripts
            analyser_conf.options = conf.analyser_options
            analyser_conf.polygon_id = conf.polygon_id

            if options.change and xml_change:
                analyser_conf.src = xml_change
            elif "dst" in conf.download:
                analyser_conf.src = conf.download["dst"]
                if "diff_path" in conf.download:
                    analyser_conf.src_state = os.path.join(conf.download["diff_path"], "state.txt")

            for name, obj in inspect.getmembers(analysers["analyser_" + analyser]):
                if (inspect.isclass(obj) and obj.__module__ == "analysers.analyser_" + analyser and
                    (name.startswith("Analyser") or name.startswith("analyser"))):
                    analyser_name = name[len("Analyser_"):]
                    analyser_conf.dst_file = name + "-" + country + ".xml"
                    analyser_conf.dst_file += ".bz2"
                    analyser_conf.dst = os.path.join(conf.dir_results, analyser_conf.dst_file)
                    analyser_conf.version = version
                    analyser_conf.verbose = options.verbose

                    # analyse
                    if not options.skip_analyser:
                        with obj(analyser_conf, logger.sub()) as analyser_obj:
                            remote_timestamp = None
                            if not options.skip_frontend_check:
                                url = modules.config.url_frontend_update + "/../../control/status/%s/%s?%s" % (country, analyser_name, 'objects=true' if options.resume else '')
                                resp = downloader.get(url)
                                if not resp.ok:
                                    logger.sub().err("Fails to get status from frontend: {0}".format(resp.status_code))
                                else:
                                    try:
                                        status = resp.json()
                                        remote_timestamp = dateutil.parser.parse(status['timestamp']) if status else None
                                    except e:
                                        logger.sub().err(e)

                            if options.resume:
                                if remote_timestamp:
                                    already_issued_objects = {'N': status['nodes'] or [], 'W': status['ways'] or [], 'R': status['relations'] or []}
                                    analyser_obj.analyser_resume(remote_timestamp, already_issued_objects)
                                    lunched_analyser_resume.append([obj, analyser_conf])
                                    continue
                                else:
                                    logger.sub().err("Not able to resume")

                            if analyser_obj.timestamp() and remote_timestamp and analyser_obj.timestamp() <= remote_timestamp:
                                logger.sub().warn("Skip, frontend is already up to date")
                                continue

                            if not options.change or not xml_change:
                                analyser_obj.analyser()
                                lunched_analyser.append([obj, analyser_conf])
                            else:
                                analyser_obj.analyser_change()
                                lunched_analyser_change.append([obj, analyser_conf])

                    # update
                    if not options.skip_upload and password != "xxx":
                        logger.sub().log("update")

                        if analyser in conf.analyser_updt_url:
                            list_urls = conf.analyser_updt_url[analyser]
                        else:
                            list_urls = [conf.updt_url]

                        for url in list_urls:
                            update_finished = False
                            nb_iter = 0
                            was_on_timeout = False
                            while not update_finished and nb_iter < 3:
                                time.sleep(nb_iter * 15)
                                nb_iter += 1
                                logger.sub().sub().log("iteration=%d" % nb_iter)
                                try:
                                    u = url + '?name=' + name + '&country=' + (conf.db_schema or conf.country)
                                    r = requests.post(u, timeout=1800, data={
                                        'analyser': analyser_name,
                                        'country': country,
                                        'code': password
                                    }, files={
                                        'content': open(analyser_conf.dst, 'rb')
                                    })
                                    r.raise_for_status()

                                    dt = r.text.strip()
                                    if dt == "FAIL: Already up to date" and was_on_timeout:
                                        logger.sub().sub().sub().err((u"UPDATE ERROR %s/%s : %s\n"%(country, analyser_name, dt)).encode("utf8"))
                                        # Log error, but do not set err_code
                                    elif dt[-2:] != "OK":
                                        logger.sub().sub().sub().err((u"UPDATE ERROR %s/%s : %s\n"%(country, analyser_name, dt)).encode("utf8"))
                                        err_code |= 4
                                    else:
                                        logger.sub().sub().log(dt)
                                    update_finished = True
                                except Exception as e:
                                    if isinstance(e, requests.exceptions.ConnectTimeout) or (isinstance(e, requests.exceptions.HTTPError) and e.response.status_code == 504):
                                        was_on_timeout = True
                                        logger.sub().sub().sub().err('got a timeout')
                                    else:
                                        tb = traceback.format_exc()
                                        logger.sub().err('error on update...')
                                        for l in tb.splitlines():
                                            logger.sub().sub().log(l)

                        if not update_finished:
                            err_code |= 1

        except:
            tb = traceback.format_exc()
            logger.sub().err("error on analyse {0}...".format(analyser))
            for l in tb.splitlines():
                logger.sub().sub().log(l)
            err_code |= 2
            continue

    if not options.no_clean:
        for (obj, analyser_conf) in lunched_analyser:
            analyser_conf.dst = None
            with obj(analyser_conf, logger.sub()) as analyser_obj:
                analyser_obj.analyser_deferred_clean()
        for (obj, analyser_conf) in lunched_analyser_change:
            analyser_conf.dst = None
            with obj(analyser_conf, logger.sub()) as analyser_obj:
                analyser_obj.analyser_deferred_clean()
        for (obj, analyser_conf) in lunched_analyser_resume:
            analyser_conf.dst = None
            with obj(analyser_conf, logger.sub()) as analyser_obj:
                analyser_obj.analyser_deferred_clean()

    return err_code