コード例 #1
0
ファイル: processing.py プロジェクト: rizac/stream2segment
def get_inventory(segment, session=None, **kwargs):
    """raises tons of exceptions (see main). FIXME: write doc
    :param session: if **not** None but a valid sqlalchemy session object, then
    the inventory, if downloaded because not present, will be saveed to the db (compressed)
    """
    data = segment.channel.station.inventory_xml
    if not data:
        query_url = get_inventory_query(segment.channel.station)
        data = url_read(query_url, **kwargs)
        if session and data:
            segment.channel.station.inventory_xml = dumps_inv(data)
            session.commit()
        elif not data:
            raise ValueError("No data from server")
    return loads_inv(data)
コード例 #2
0
ファイル: processing.py プロジェクト: rizac/stream2segment
def process_all(session, segments_model_instances, run_id,
                notify_progress_func=lambda *a, **v: None, **processing_args):
    """
        Processes all segments_model_instances. FIXME: write detailed doc
    """
    # redirect stndard error to devnull. FIXME if we can capture it segment-wise (that
    # would be great but.. how much effort and how much performances decreasing?)
    # redirect_external_out(2)

    # set after how many processed segments we want to commit. Setting it higher might speed up
    # calculations at expense of loosing max_session_new segment if just one is wrong
    max_session_new = 10
    # commit for safety:
    commit(session, on_exc=lambda exc: logger.error(str(exc)))

    calculated = 0
    saved = 0

    logger.info("Processing %d segments", len(segments_model_instances))
    ret = []

    sta2segs = defdict(lambda: [])
    for seg in segments_model_instances:
        sta2segs[seg.channel.station_id].append(seg)

    # process segments station-like, so that we load only one inventory at a time
    # and hopefully it will garbage collected (inventory object is big)
    for sta_id, segments in sta2segs.iteritems():
        inventory = None
        try:
            inventory = get_inventory(segments[0], session, timeout=30)
        except SQLAlchemyError as exc:
            logger.warning("Error while saving inventory (station id=%s), "
                           "%d segment will not be processed: %s",
                           str(sta_id), len(segments), str(exc))
            session.rollback()
        except (urllib2.HTTPError, urllib2.URLError, httplib.HTTPException, socket.error) as _:
            logger.warning("Error while downloading inventory (station id=%s), "
                           "%d segment will not be processed: %s URL: %s",
                           str(sta_id), len(segments), str(_), get_inventory_query(segments[0]))
        except Exception as exc:  # pylint:disable=broad-except
            logger.warning("Error while creating inventory (station id=%s), "
                           "%d segment will not be processed: %s",
                           str(sta_id), len(segments), str(exc))

        if inventory is None:
            notify_progress_func(len(segments))
            continue
            # pass

        # THIS IS THE METHOD WITHOUT MULTIPROCESS: 28, 24.7 secs on 30 segments
        for seg in segments:
            notify_progress_func(1)
            pro = models.Processing(run_id=run_id)
            # pro.segment = seg
            # session.flush()
            try:
                pro = process(pro, seg, seg.channel, seg.channel.station, seg.event,
                              seg.datacenter, inventory, **processing_args)
                pro.id = None
                pro.segment = seg
                calculated += 1
                ret.append(pro)
                # flush(session, on_exc=lambda exc: logger.error(str(exc)))
                if len(ret) >= max_session_new:
                    added = len(ret)
                    session.add_all(ret)
                    ret = []
                    if commit(session,
                              on_exc=lambda exc: logger.warning(msgs.db.dropped_seg(added,
                                                                                    None,
                                                                                    exc))):
                        saved += added
            except Exception as exc:  # pylint:disable=broad-except
                logger.warning(msgs.calc.dropped_seg(seg, "segments processing", exc))

    added = len(ret)
    if added and commit(session, on_exc=lambda exc: logger.warning(msgs.db.dropped_seg(added,
                                                                                       None,
                                                                                       exc))):
        saved += added
    logger.info("")
    logger.info("%d segments successfully processed, %d succesfully saved", calculated, saved)
    return ret