def get_inventory(segment, session=None, **kwargs): """raises tons of exceptions (see main). FIXME: write doc :param session: if **not** None but a valid sqlalchemy session object, then the inventory, if downloaded because not present, will be saveed to the db (compressed) """ data = segment.channel.station.inventory_xml if not data: query_url = get_inventory_query(segment.channel.station) data = url_read(query_url, **kwargs) if session and data: segment.channel.station.inventory_xml = dumps_inv(data) session.commit() elif not data: raise ValueError("No data from server") return loads_inv(data)
def process_all(session, segments_model_instances, run_id, notify_progress_func=lambda *a, **v: None, **processing_args): """ Processes all segments_model_instances. FIXME: write detailed doc """ # redirect stndard error to devnull. FIXME if we can capture it segment-wise (that # would be great but.. how much effort and how much performances decreasing?) # redirect_external_out(2) # set after how many processed segments we want to commit. Setting it higher might speed up # calculations at expense of loosing max_session_new segment if just one is wrong max_session_new = 10 # commit for safety: commit(session, on_exc=lambda exc: logger.error(str(exc))) calculated = 0 saved = 0 logger.info("Processing %d segments", len(segments_model_instances)) ret = [] sta2segs = defdict(lambda: []) for seg in segments_model_instances: sta2segs[seg.channel.station_id].append(seg) # process segments station-like, so that we load only one inventory at a time # and hopefully it will garbage collected (inventory object is big) for sta_id, segments in sta2segs.iteritems(): inventory = None try: inventory = get_inventory(segments[0], session, timeout=30) except SQLAlchemyError as exc: logger.warning("Error while saving inventory (station id=%s), " "%d segment will not be processed: %s", str(sta_id), len(segments), str(exc)) session.rollback() except (urllib2.HTTPError, urllib2.URLError, httplib.HTTPException, socket.error) as _: logger.warning("Error while downloading inventory (station id=%s), " "%d segment will not be processed: %s URL: %s", str(sta_id), len(segments), str(_), get_inventory_query(segments[0])) except Exception as exc: # pylint:disable=broad-except logger.warning("Error while creating inventory (station id=%s), " "%d segment will not be processed: %s", str(sta_id), len(segments), str(exc)) if inventory is None: notify_progress_func(len(segments)) continue # pass # THIS IS THE METHOD WITHOUT MULTIPROCESS: 28, 24.7 secs on 30 segments for seg in segments: notify_progress_func(1) pro = models.Processing(run_id=run_id) # pro.segment = seg # session.flush() try: pro = process(pro, seg, seg.channel, seg.channel.station, seg.event, seg.datacenter, inventory, **processing_args) pro.id = None pro.segment = seg calculated += 1 ret.append(pro) # flush(session, on_exc=lambda exc: logger.error(str(exc))) if len(ret) >= max_session_new: added = len(ret) session.add_all(ret) ret = [] if commit(session, on_exc=lambda exc: logger.warning(msgs.db.dropped_seg(added, None, exc))): saved += added except Exception as exc: # pylint:disable=broad-except logger.warning(msgs.calc.dropped_seg(seg, "segments processing", exc)) added = len(ret) if added and commit(session, on_exc=lambda exc: logger.warning(msgs.db.dropped_seg(added, None, exc))): saved += added logger.info("") logger.info("%d segments successfully processed, %d succesfully saved", calculated, saved) return ret