def _get_session(app): # maybe not nicest way to store the session, but we want to avoid flask-sqlalchemy # for such a simple app key = '__DBSESSION__' if not app.config.get(key, None): sess = get_session(app.config['DATABASE_URI']) app.config[key] = sess return app.config[key]
def run(action, dburl, eventws, minmag, minlat, maxlat, minlon, maxlon, start, end, stimespan, search_radius, channels, min_sample_rate, s_inventory, traveltime_phases, wtimespan, processing, advanced_settings, isterminal=False): """ Main run method. KEEP the ARGUMENT THE SAME AS THE config.yaml OTHERWISE YOU'LL GET A DIFFERENT CONFIG SAVED IN THE DB :param processing: a dict as load from the config """ _args_ = dict(locals()) # this must be the first statement, so that we catch all arguments and # no local variable (none has been declared yet). Note: dict(locals()) avoids problems with # variables created inside loops, when iterating over _args_ (see below) if action == 'gui': from stream2segment.gui import main as main_gui main_gui.run_in_browser(dburl) return 0 session = get_session(dburl, scoped=True) # FIXME: is it necessary for multiprocessing in processing? # create logger handler run_row = config_logger_and_return_run_instance(session, isterminal) yaml_dict = load_def_cfg() # update with our current variables (only those present in the config_yaml): yaml_dict.update(**{k: v for k, v in _args_.iteritems() if k in yaml_dict}) # print local vars: yaml_content = StringIO() # use safe_dump to avoid python types. See: # http://stackoverflow.com/questions/1950306/pyyaml-dumping-without-tags yaml_content.write(yaml.safe_dump(yaml_dict, default_flow_style=False)) config_text = yaml_content.getvalue() if isterminal: print("Arguments:") tab = " " print(tab + config_text.replace("\n", "\n%s" % tab)) run_row.config = tounicode(config_text) session.commit() # udpate run row. flush might be also used but we prever sotring to db ret = 0 try: if s_inventory is None: sta_del = session.query(models.Station).\ filter(models.Station.inventory_xml != None).\ update({models.Station.inventory_xml.key: None}) if sta_del: session.commit() logger.info("Deleted %d station inventories (set to null)" % sta_del) segments = [] if 'd' in action: starttime = time.time() ret = query_main(session, run_row.id, eventws, minmag, minlat, maxlat, minlon, maxlon, start, end, stimespan, search_radius['minmag'], search_radius['maxmag'], search_radius['minradius'], search_radius['maxradius'], channels, min_sample_rate, s_inventory is True, traveltime_phases, wtimespan, advanced_settings, isterminal) logger.info("Download completed in %s", tdstr(dt.timedelta(seconds=time.time()-starttime))) if 'p' in action.lower() and ret == 0: starttime = time.time() if 'P' in action: try: _ = session.query(models.Processing).delete() # returns num rows deleted session.commit() except SQLAlchemyError: session.rollback() raise Exception("Unable to delete all processing (internal db error). Please" "try to run again the program") segments = session.query(models.Segment).\ filter(~models.Segment.processings.any()).all() # @UndefinedVariable process_main(session, segments, run_row.id, isterminal, **processing) logger.info("Processing completed in %s", tdstr(dt.timedelta(seconds=time.time()-starttime))) logger.info("") logger.info("%d total error(s), %d total warning(s)", run_row.errors, run_row.warnings) except Exception as exc: logger.critical(str(exc)) raise finally: for handler in logger.handlers: try: handler.close() except (AttributeError, TypeError, IOError, ValueError): pass return 0
def example(): """ This is an example function to get data from the local db. You can implement *any* function (like this one) in *any* module as long as you first type in the module: ``` from stream2segment.workwithdb import get_session ``` Read the comments below before proceeding """ # ============================= # 1) Introduction # ============================= # We use slqAlchemy library to handle database IO # The library allow us to *write in python code* our tables (database schema) # and cares about the rest (creating a db, writing and reading to it, handle different # types of sql databases if in the future we will change, handle database migrations etc..) # The way sql alchemy works is writing python classes, called MODELS. # IMPORTANT: A MODEL is a python class and represents a database table. # A model instance (or instance only) is a python object and represents a table row # Let's import our models (PYTHON classes reflecting the database tables) # You should import them at module level, we do it here for code readability from stream2segment.io.db.models import Event, Station, Segment, Processing, Channel,\ Run, DataCenter # have a look in stream2segment.io.db.models for inspecting the TYPE of the value # of each column. For instance # # class Event(FDSNBase): # """Events""" # # __tablename__ = "events" # # id = Column(String, primary_key=True, autoincrement=False) # time = Column(DateTime, nullable=False) # latitude = Column(Float, nullable=False) # ... # # Then, you see that we have an 'id' column primary key (you don't care), a 'time' column # of type 'DateTime' (python datetime object), a column 'latitude' of type float, and so on... # then, you instantiate the session. The session is the tool to communicate with the database session = get_session() # the line above loads db from config.yaml. To supply a custom sqlite path, use e.g.: # session = get_session("sqlite:///path/to/my/db.sqlite") # ================================ # 2) Now we can do simple queries: # ================================ # query *all* downloaded segments: segments = session.query(Segment).all() # The returned type is a python list of model instances (python objects). # how many of them? normal python way with lists: seg_num = len(segments) # Each instance attributes represents the table columns. # Again, if you forget which columns a table has, just look at the relative model in stream2segment.io.db.models # in this case it would be the 'Segment' model # So, if we are interested in the distance to the seismic event: first_seg = segments[0] distance_to_event = first_seg.event_distance_deg # a python float # and so on. Quite simple. The only difference is the segments 'data' column, where # we store the mseed. That is 'Binary' data. We have implemented our custom function # to load binary data: from stream2segment.analysis.mseeds import loads mseed = loads(first_seg.data) # obspy Stream object # Same for binary data in the processings table # First we get one instance (reflecting a table row, remember) the usual way: processings = session.query(Processing).all() first_pro = processings[0] # And then we read data, as we did above: # Note that we implemented our API in such a way that All these variables are Stream objects, # so no the logic, methods and functions of these objects are the same! mseed_rem_resp_savewindow = loads(first_pro.mseed_rem_resp_savewindow) # obspy Stream object wood_anderson_savewindow = loads(first_pro.wood_anderson_savewindow) # obspy Stream object cum_rem_resp = loads(first_pro.cum_rem_resp) # obspy Stream object # You want to save as pbspy Stream? As usual: # cum_rem_resp.write("/path/to/myfilename.mseed") # mseed.write("/path/to/myfilename.mseed") # small exceptions for ffts: you read them with loads (As usual) fft_rem_resp_t05_t95 = loads(first_pro.fft_rem_resp_t05_t95) fft_rem_resp_until_atime = loads(first_pro.fft_rem_resp_until_atime) # but the objects are not obspy Stream objects because they are on freq scale # So you won't have all the methods of the Stream objects but # accessing their data is THE SAME: data = fft_rem_resp_t05_t95.data # as for obspy Stream.data, except data is numpy COMPLEX array df = fft_rem_resp_t05_t95.stats.delta # as for obspy Stream.stats.delta, except that the unit is in Herz f0 = fft_rem_resp_t05_t95.stats.startfreq # as for obspy Stream.stats.starttime, except that the unit is in Herz # So, do some (very stupid) computation: fft_rem_resp_times_two = 2 * fft_rem_resp_t05_t95.data # You could in principle save an fft-like array, but it's up to you to decide how # As said, this kind of objects cannot be converted back to obspy Stream(s) # ============================= # 3) Working with relationships # ============================= # Last thing: if you want more complex queries, there are a lot of methods # see e.g. here: # http://docs.sqlalchemy.org/en/latest/orm/tutorial.html#querying # but you can also use "our" relationships implemented via some sqlalchemy facilities # This might be less performant but it's easier to work with at the beginning: evt = first_seg.event # an instance of the Event class representing the seismic event originating the segment cha = first_seg.channel # an instance of the Channel class representing the segment channel dcn = first_seg.datacenter # an instance of the Channel class representing the segment datacenter run = first_seg.run # an instance of the Channel class representing the segment run # Examples: # give me all segments whose event's magnitude is between 3 and 4 segments2 = [] for seg in segments: if seg.event.magnitude > 3 and seg.event.magnitude < 4: segments2.append(seg) # now work with your filtered segments (sub) list.. # give me all segments whose event is greater than a datetime from datetime import datetime time = datetime(2016, 1, 5) segments2 = [] for seg in segments: if seg.event.time > time: segments2.append(seg) # now work with your filtered segments (sub) list.. # In the same manner, a processings instance has an attribute 'segment': seg = first_pro.segment # give me all processed data from the run with id = 3 (knowing e.g., that the run was a specific one with interesting segments) processings2 = [] for pro in processings: if pro.segment.run.id == 3: processings2.append(pro) # now work with your filtered segments (sub) list.. # Note that a segment instance has an attribute 'processings' WHICH IS A LIST # why? there might be more processing rows for one segment, although this is NOT # currently implemented. Thus you should always get a zero- or one- element list procs = first_seg.processings if len(procs) > 0: first_pro = procs[0]