Exemple #1
0
def test_tarball_round_trip(named_temporary_file, fixture_db_session):
    voevent_etrees = fake.heartbeat_packets()
    # with open(assasn_non_ascii_packet_filepath, 'rb') as f:
    #     voevent_etrees.append(vp.load(f))
    s = fixture_db_session
    for etree in voevent_etrees:
        s.add(models.Voevent.from_etree(etree))
    s.flush()
    voevent_dbrows = s.query(models.Voevent.ivorn, models.Voevent.xml).all()
    assert len(voevent_dbrows) == len(voevent_etrees)
    voevent_rowgen = list(models.Voevent.from_etree(v) for v in voevent_etrees)
    assert voevent_dbrows[0].ivorn == voevent_rowgen[0].ivorn
    assert voevent_dbrows[0].xml == voevent_rowgen[0].xml

    assert type(voevent_dbrows[0].xml) == type(voevent_rowgen[0].xml)
    assert type(voevent_rowgen[0].xml) == six.binary_type

    # Therefore it's crucial to test with an actual round-tripped dataset,
    # the 'voevent_dbrows' from above:
    fname = named_temporary_file.name
    filestore.write_tarball(voevent_dbrows, fname)

    loaded_voevents = [
        vp.loads(s.xml) for s in filestore.tarfile_xml_generator(fname)
    ]

    def to_strings(voeventlist):
        return [vp.dumps(v) for v in voeventlist]

    def to_ivorn(voeventlist):
        return [v.attrib['ivorn'] for v in voeventlist]

    assert (to_ivorn(voevent_etrees) == to_ivorn(loaded_voevents))
    assert (to_strings(voevent_etrees) == to_strings(loaded_voevents))
Exemple #2
0
def load_from_tarfile(session, tarfile_path, check_for_duplicates,
                      pkts_per_commit=1000):
    """
    Iterate through xml files in a tarball and attempt to load into database.

    .. warning::
        Very slow with duplicate checking enabled.

    Returns:
        tuple: (n_parsed, n_loaded) - Total number of packets parsed from
            tarbar, and number successfully loaded.

    """
    tf_stream = tarfile_xml_generator(tarfile_path)
    logger.info("Loading: " + tarfile_path)
    n_parsed = 0
    n_loaded = 0
    for tarinf in tf_stream:
        try:
            v = vp.loads(tarinf.xml, check_version=False)
            if v.attrib['version'] != '2.0':
                logger.debug(
                    'Packet: {} is not VO-schema version 2.0.'.format(
                        tarinf.name))
            n_parsed += 1
        except:
            logger.exception('Error loading file {}, skipping'.format(
                tarinf.name))
            continue
        try:
            new_row = Voevent.from_etree(v)
            if check_for_duplicates:
                if ivorn_present(session, new_row.ivorn):
                    logger.debug(
                        "Ignoring duplicate ivorn: {} in file {}".format(
                            new_row.ivorn, tarinf.name))
                    continue
            session.add(new_row)
            n_loaded += 1
        except:
            logger.exception(
                'Error converting file {} to database row, skipping'.
                    format(tarinf.name))
            continue

        if n_loaded % pkts_per_commit == 0:
            session.commit()
    session.commit()
    logger.info("Successfully parsed {} packets, of which loaded {}.".format(n_parsed, n_loaded))
    return n_parsed, n_loaded
def test_tarball_round_trip(named_temporary_file, fixture_db_session):
    voevent_etrees = fake.heartbeat_packets()
    # with open(assasn_non_ascii_packet_filepath, 'rb') as f:
    #     voevent_etrees.append(vp.load(f))
    s = fixture_db_session
    for etree in voevent_etrees:
        s.add(models.Voevent.from_etree(etree))
    s.flush()
    voevent_dbrows = s.query(models.Voevent.ivorn, models.Voevent.xml).all()
    assert len(voevent_dbrows) == len(voevent_etrees)
    voevent_rowgen = list(models.Voevent.from_etree(v) for v in voevent_etrees)
    assert voevent_dbrows[0].ivorn == voevent_rowgen[0].ivorn
    assert voevent_dbrows[0].xml == voevent_rowgen[0].xml
    # Here's the crux
    # A newly instantiated model will store a string type same as Python 2;
    # bytestring stores as bytestring, unicode as unicode.
    # However, after a round-trip to the database, proper typing has been
    # asserted and the bytestring is returned as unicode!
    assert type(voevent_dbrows[0].xml) != type(voevent_rowgen[0].xml)
    assert type(voevent_rowgen[0].xml) == str
    assert type(voevent_dbrows[0].xml) == unicode

    # Therefore it's crucial to test with an actual round-tripped dataset,
    # the 'voevent_dbrows' from above:
    fname = named_temporary_file.name
    filestore.write_tarball(voevent_dbrows, fname)

    loaded_voevents = [vp.loads(s.xml) for s in
                       filestore.tarfile_xml_generator(fname)]

    def to_strings(voeventlist):
        return [vp.dumps(v) for v in voeventlist]

    def to_ivorn(voeventlist):
        return [v.attrib['ivorn'] for v in voeventlist]

    assert (to_ivorn(voevent_etrees) == to_ivorn(loaded_voevents))
    assert (to_strings(voevent_etrees) == to_strings(loaded_voevents))