def test_localdb_deduplication(fixture_db_session): feed1 = asassn.AsassnFeed() feed1._content = asassn_content_2018 feed1_row_ids = feed1.determine_new_entries() assert len(feed1_row_ids) == len(feed1.event_id_data_map) s = fixture_db_session for id in feed1_row_ids: v = feed1.generate_voevent(id) s.add(Voevent.from_etree(v)) s.commit() feed1_new_ids = feed1.determine_new_entries() assert len(feed1_new_ids) == 0 feed2 = asassn.AsassnFeed() feed2._content = asassn_content_2018 feed2_new_ids = feed2.determine_new_entries() assert len(feed2_new_ids) == len(feed2.event_id_data_map) - len( feed1.event_id_data_map) s = fixture_db_session for id in feed2_new_ids: v = feed2.generate_voevent(id) s.add(Voevent.from_etree(v)) s.commit() modified_content = asassn_content_2018.replace(b'ASASSN-16ad', b'ASASSN-16adfooishbar') feed3 = asassn.AsassnFeed() feed3._content = modified_content assert [] == feed3.determine_new_entries()
def insert_voevents(self, fixture_db_session): """ Insert two Vovents (mostly blank 'heartbeat' packet, GRB) as setup """ s = fixture_db_session assert len(s.query(Voevent).all()) == 0 # sanity check s.add(Voevent.from_etree(fake.heartbeat_packets()[0])) s.add(Voevent.from_etree(swift_bat_grb_655721)) s.flush() assert len(s.query(Voevent).all()) == 2 # 1 with, 1 without position
def insert_voevents(self, fixture_db_session): """ Insert two Vovents (GRB, XRT followup) as setup (NB XRT packet cites -> BAT packet.) """ s = fixture_db_session assert len(s.query(Voevent).all()) == 0 # sanity check s.add(Voevent.from_etree(swift_bat_grb_655721)) s.add(Voevent.from_etree(swift_xrt_grb_655721)) s.flush()
def direct_store_voevent(voevent): s = session_registry() try: s.add(Voevent.from_etree(voevent)) s.commit() except sqlalchemy.exc.SQLAlchemyError: s.rollback() raise
def test_insert_tdb_timestamp_voevent(self, fixture_db_session): """ Insert Gaia packet """ s = fixture_db_session assert len(s.query(Voevent).all()) == 0 # sanity check s.add(Voevent.from_etree(gaia_16bsg)) s.flush() assert len(s.query(Voevent).all()) == 1 positions_parsed = Coord.from_etree(gaia_16bsg) assert len(positions_parsed) == 1 gaia_posn = positions_parsed[0] assert gaia_posn.time is not None
def __init__(self, fixture_db_session): s = fixture_db_session packets = [swift_bat_grb_655721] packets.extend(fake.heartbeat_packets(role=vp.definitions.roles.test)) extra_packets = fake.heartbeat_packets( start=fake.default_start_dt + timedelta(hours=24), role=vp.definitions.roles.utility) # Packets referenced by other packets: # self.cited = set() # Count times ivorn referenced by other packets in db : self.cite_counts = defaultdict(int) # Packets containing at least one cite entry self.followup_packets = [] # Total number of citations (one packet may have multiple cite entries) self.n_citations = 0 c0 = packets[0].attrib['ivorn'] c1 = packets[1].attrib['ivorn'] #One reference in ep0 self.add_reference(extra_packets[0], c0) #Two references in ep1 self.add_reference(extra_packets[1], c0) self.add_reference(extra_packets[1], c1) # # #Now cite ep[0], making it both cites / cited_by c2 = extra_packets[0].attrib['ivorn'] self.add_reference(extra_packets[2], c2) # Add a citation to an external packet self.add_reference(extra_packets[3], swift_xrt_grb_655721.attrib['ivorn']) packets.extend(extra_packets) self.packet_dict = {pkt.attrib['ivorn']: pkt for pkt in packets} self.insert_packets = packets[:-1] self.insert_packets_dumps = [vp.dumps(v) for v in self.insert_packets] self.streams = [ v.attrib['ivorn'].split('#')[0][6:] for v in self.insert_packets ] self.stream_set = list(set(self.streams)) self.roles = [v.attrib['role'] for v in self.insert_packets] self.role_set = list(set(self.roles)) self.remaining_packet = packets[-1] # Insert all but the last packet, this gives us a useful counter-example s.add_all((Voevent.from_etree(p) for p in self.insert_packets)) self.n_inserts = len(self.insert_packets) self.inserted_ivorns = [p.attrib['ivorn'] for p in self.insert_packets] self.absent_ivorn = self.remaining_packet.attrib['ivorn']
def __init__(self, fixture_db_session): s = fixture_db_session packets = [swift_bat_grb_655721] packets.extend(fake.heartbeat_packets(role=vp.definitions.roles.test)) extra_packets = fake.heartbeat_packets( start=fake.default_start_dt + timedelta(hours=24), role=vp.definitions.roles.utility) # Packets referenced by other packets: # self.cited = set() # Count times ivorn referenced by other packets in db : self.cite_counts = defaultdict(int) # Packets containing at least one cite entry self.followup_packets = [] # Total number of citations (one packet may have multiple cite entries) self.n_citations = 0 c0 = packets[0].attrib['ivorn'] c1 = packets[1].attrib['ivorn'] #One reference in ep0 self.add_reference(extra_packets[0], c0) #Two references in ep1 self.add_reference(extra_packets[1],c0) self.add_reference(extra_packets[1],c1) # # #Now cite ep[0], making it both cites / cited_by c2 = extra_packets[0].attrib['ivorn'] self.add_reference(extra_packets[2],c2) # Add a citation to an external packet self.add_reference(extra_packets[3], swift_xrt_grb_655721.attrib['ivorn']) packets.extend(extra_packets) self.packet_dict = { pkt.attrib['ivorn'] : pkt for pkt in packets } self.insert_packets = packets[:-1] self.insert_packets_dumps = [vp.dumps(v) for v in self.insert_packets] self.streams = [v.attrib['ivorn'].split('#')[0][6:] for v in self.insert_packets] self.stream_set = list(set(self.streams)) self.roles = [v.attrib['role'] for v in self.insert_packets] self.role_set = list(set(self.roles)) self.remaining_packet = packets[-1] # Insert all but the last packet, this gives us a useful counter-example s.add_all( (Voevent.from_etree(p) for p in self.insert_packets)) self.n_inserts = len(self.insert_packets) self.inserted_ivorns = [p.attrib['ivorn'] for p in self.insert_packets] self.absent_ivorn = self.remaining_packet.attrib['ivorn']
def load_from_tarfile(session, tarfile_path, check_for_duplicates, pkts_per_commit=1000): """ Iterate through xml files in a tarball and attempt to load into database. .. warning:: Very slow with duplicate checking enabled. Returns: tuple: (n_parsed, n_loaded) - Total number of packets parsed from tarbar, and number successfully loaded. """ tf_stream = tarfile_xml_generator(tarfile_path) logger.info("Loading: " + tarfile_path) n_parsed = 0 n_loaded = 0 for tarinf in tf_stream: try: v = vp.loads(tarinf.xml, check_version=False) if v.attrib['version'] != '2.0': logger.debug( 'Packet: {} is not VO-schema version 2.0.'.format( tarinf.name)) n_parsed += 1 except: logger.exception('Error loading file {}, skipping'.format( tarinf.name)) continue try: new_row = Voevent.from_etree(v) if check_for_duplicates: if ivorn_present(session, new_row.ivorn): logger.debug( "Ignoring duplicate ivorn: {} in file {}".format( new_row.ivorn, tarinf.name)) continue session.add(new_row) n_loaded += 1 except: logger.exception( 'Error converting file {} to database row, skipping'. format(tarinf.name)) continue if n_loaded % pkts_per_commit == 0: session.commit() session.commit() logger.info("Successfully parsed {} packets, of which loaded {}.".format(n_parsed, n_loaded)) return n_parsed, n_loaded
def safe_insert_voevent(session, etree): """ Insert a VOEvent, or skip with a warning if it's a duplicate. NB XML contents are checked to confirm duplication - if there's a mismatch, we raise a ValueError. """ new_row = Voevent.from_etree(etree) if not ivorn_present(session, new_row.ivorn): session.add(new_row) else: old_xml = session.query(Voevent.xml).filter( Voevent.ivorn == new_row.ivorn).scalar() if old_xml != new_row.xml: raise ValueError('Tried to load a VOEvent with duplicate IVORN,' 'but XML contents differ - not clear what to do.') else: logger.warn('Skipping insert for packet with duplicate IVORN, ' 'XML matches OK.')
def safe_insert_voevent(session, etree): """ Insert a VOEvent, or skip with a warning if it's a duplicate. NB XML contents are checked to confirm duplication - if there's a mismatch, we raise a ValueError. """ new_row = Voevent.from_etree(etree) if not ivorn_present(session, new_row.ivorn): session.add(new_row) else: old_xml = session.query( Voevent.xml).filter(Voevent.ivorn == new_row.ivorn).scalar() if old_xml != new_row.xml: raise ValueError('Tried to load a VOEvent with duplicate IVORN,' 'but XML contents differ - not clear what to do.') else: logger.warning('Skipping insert for packet with duplicate IVORN, ' 'XML matches OK.')
def assign_test_client_and_initdb(self, flask_test_client, fixture_db_session): self.c = flask_test_client # Purely for brevity n_packets = 17 packets = heartbeat_packets(n_packets=n_packets) for counter, pkt in enumerate(packets, start=1): packet_dec = 180.0 / n_packets * counter - 90 coords = vp.Position2D( ra=15, dec=packet_dec, err=0.1, units=vp.definitions.units.degrees, system=vp.definitions.sky_coord_system.utc_icrs_geo) # print "Inserting coords", coords vp.add_where_when( pkt, coords=coords, obs_time=iso8601.parse_date(pkt.Who.Date.text), observatory_location=vp.definitions.observatory_location.geosurface ) self.packets = packets self.ivorn_dec_map = {} for pkt in self.packets: posn = vp.get_event_position(pkt) self.ivorn_dec_map[pkt.attrib['ivorn']] = posn.dec fixture_db_session.add(Voevent.from_etree(pkt))
def assign_test_client_and_initdb(self, flask_test_client, fixture_db_session): self.c = flask_test_client # Purely for brevity n_packets = 17 packets = heartbeat_packets(n_packets=n_packets) for counter, pkt in enumerate(packets, start=1): packet_dec = 180.0 / n_packets * counter -90 coords = vp.Position2D( ra=15, dec=packet_dec, err=0.1, units=vp.definitions.units.degrees, system=vp.definitions.sky_coord_system.utc_icrs_geo) # print "Inserting coords", coords vp.add_where_when( pkt, coords=coords, obs_time=iso8601.parse_date(pkt.Who.Date.text), observatory_location=vp.definitions.observatory_location.geosurface ) self.packets = packets self.ivorn_dec_map = {} for pkt in self.packets: posn = vp.get_event_position(pkt) self.ivorn_dec_map[pkt.attrib['ivorn']] = posn.dec fixture_db_session.add(Voevent.from_etree(pkt))
def test_unique_ivorn_constraint(self, fixture_db_session): s = fixture_db_session with pytest.raises(IntegrityError): # Should throw, breaks unique IVORN constraint: s.add(Voevent.from_etree(swift_bat_grb_pos_v2_etree)) s.flush()
def insert_single_voevent(self, fixture_db_session): """Insert a single VOEvent as setup""" s = fixture_db_session assert len(s.query(Voevent).all()) == 0 # sanity check s.add(Voevent.from_etree(swift_bat_grb_pos_v2_etree))
def test_bad_coord_rejection(): v = Voevent.from_etree(konus_lc) assert v.coords == []