Exemple #1
0
def test_string_latin1_encoding_works(fpath):
    prev_encodings = dlisio.get_encodings()
    try:
        dlisio.set_encodings(['latin1'])
        with dlisio.load(fpath) as (f, *_):
            channel = f.object('CHANNEL', 'CHANN1', 10, 0)
            assert channel.units == "custom unit°"
    finally:
        dlisio.set_encodings(prev_encodings)
Exemple #2
0
def test_string_encoding_warns(fpath):
    prev_encodings = dlisio.get_encodings()
    try:
        dlisio.set_encodings([])
        with pytest.warns(UnicodeWarning):
            with dlisio.load(fpath) as (f, *_):
                channel = f.object('CHANNEL', 'CHANN1', 10, 0)
                assert channel.units == b'custom unit\xb0'
    finally:
        dlisio.set_encodings(prev_encodings)
Exemple #3
0
def test_broken_utf8_object_name(tmpdir, merge_files_oneLR):
    #some actual files have obname which fails with utf-8 codec
    path = os.path.join(str(tmpdir), 'broken_utf8_object_name.dlis')
    content = [
        'data/chap3/start.dlis.part',
        'data/chap3/template/default.dlis.part',
        'data/chap3/object/broken-utf8-object.dlis.part',
    ]
    merge_files_oneLR(path, content)
    with pytest.warns(UnicodeWarning):
        with dlisio.load(path):
            pass
    prev_encodings = dlisio.get_encodings()
    try:
        dlisio.set_encodings(['koi8_r'])
        with dlisio.load(path) as (f, *_):
            _ = f.object('VERY_MUCH_TESTY_SET', 'КАДР', 12, 4)
    finally:
        dlisio.set_encodings(prev_encodings)
Exemple #4
0
def test_broken_utf8_label(tmpdir, merge_files_oneLR):
    path = os.path.join(str(tmpdir), 'broken_utf8_label.dlis')
    content = [
        'data/chap3/start.dlis.part',
        'data/chap3/template/broken-utf8-label.dlis.part',
        'data/chap3/object/object.dlis.part',
    ]
    merge_files_oneLR(path, content)
    with pytest.warns(UnicodeWarning):
        with dlisio.load(path):
            pass
    prev_encodings = dlisio.get_encodings()
    try:
        dlisio.set_encodings(['koi8_r'])
        with dlisio.load(path) as (f, *_):
            obj = f.object('VERY_MUCH_TESTY_SET', 'OBJECT', 1, 1)
            assert obj.attic['ДОХЛЫЙ-ПАРАМЕТР'] == ['Have a nice day!']
    finally:
        dlisio.set_encodings(prev_encodings)
Exemple #5
0
def test_broken_utf8_set(tmpdir, merge_files_oneLR):
    path = os.path.join(str(tmpdir), 'broken_utf8_set.dlis')
    content = [
        'data/chap3/sul.dlis.part',
        'data/chap3/set/broken-utf8-set.dlis.part',
        'data/chap3/template/default.dlis.part',
        'data/chap3/object/object.dlis.part',
    ]
    merge_files_oneLR(path, content)
    with pytest.warns(UnicodeWarning):
        with dlisio.load(path) as (f, *_):
            f.load()
    prev_encodings = dlisio.get_encodings()
    try:
        dlisio.set_encodings(['koi8_r'])
        with dlisio.load(path) as (f, *_):
            _ = f.object('СЕТ_КИРИЛЛИЦЕЙ', 'OBJECT', 1, 1)
            #assert set_name == 'МЕНЯ.ЗОВУТ.СЕТ'
    finally:
        dlisio.set_encodings(prev_encodings)
Exemple #6
0
def test_non_utf8_frame():
    # findfdata (and hence load) should _not_ fail because of a non-utf8
    # Frame name. Furthermore, it should be possible to extract the curves of
    # said frame, provided that the right encoding is set.
    fpath = 'data/chap4-7/encoded-obname.dlis'

    prev_encodings = dlisio.get_encodings()
    dlisio.set_encodings(['koi8_r'])

    f, = dlisio.load(fpath)
    try:
        fr = f.object('FRAME', 'ENCODED-[Б╣дTУБ1')
        ch = fr.channels[0]
        curves = fr.curves()

        assert curves[fr.index] == [1]
        assert curves[ch.fingerprint] == [32915]

    finally:
        dlisio.set_encodings(prev_encodings)
        f.close()
Exemple #7
0
def test_broken_utf8_obname_value(tmpdir, merge_files_oneLR):
    path = os.path.join(str(tmpdir), 'broken_utf8_obname_value.dlis')
    content = [
        'data/chap3/start.dlis.part',
        'data/chap3/template/default.dlis.part',
        'data/chap3/object/object.dlis.part',
        'data/chap3/objattr/broken-utf8-obname-value.dlis.part',
    ]
    merge_files_oneLR(path, content)
    with pytest.warns(UnicodeWarning):
        with dlisio.load(path):
            pass
    prev_encodings = dlisio.get_encodings()
    try:
        dlisio.set_encodings(['koi8_r'])
        with dlisio.load(path) as (f, *_):
            obj = f.object('VERY_MUCH_TESTY_SET', 'OBJECT', 1, 1)
            obname = (2, 2, 'КОТ')
            assert obj.attic['DEFAULT_ATTRIBUTE'] == [obname]
    finally:
        dlisio.set_encodings(prev_encodings)
Exemple #8
0
def test_broken_utf8_object_name(tmpdir, merge_files_oneLR):
    #some actual files have obname which fails with utf-8 codec
    path = os.path.join(str(tmpdir), 'broken_utf8_object_name.dlis')
    content = [
        'data/chap3/start.dlis.part',
        'data/chap3/template/default.dlis.part',
        'data/chap3/object/broken-utf8-object.dlis.part',
    ]
    merge_files_oneLR(path, content)

    prev_encodings = dlisio.get_encodings()
    dlisio.set_encodings([])

    try:
        f, = dlisio.load(path)
        with pytest.warns(UnicodeWarning):
            _ = f.match('.*', 'VERY_MUCH_TESTY_SET')

        dlisio.set_encodings(['koi8_r'])

        objs = f.match('.*', 'VERY_MUCH_TESTY_SET')
        [obj] = [x for x in objs]
        assert obj.name == 'КАДР'
    finally:
        dlisio.set_encodings(prev_encodings)
        f.close()
Exemple #9
0
def test_broken_utf8_obname_value(tmpdir, merge_files_oneLR):
    path = os.path.join(str(tmpdir), 'broken_utf8_obname_value.dlis')
    content = [
        'data/chap3/start.dlis.part',
        'data/chap3/template/default.dlis.part',
        'data/chap3/object/object.dlis.part',
        'data/chap3/objattr/broken-utf8-obname-value.dlis.part',
    ]
    merge_files_oneLR(path, content)

    prev_encodings = dlisio.get_encodings()
    dlisio.set_encodings([])

    try:
        f, = dlisio.load(path)
        obj = f.object('VERY_MUCH_TESTY_SET', 'OBJECT', 1, 1)
        obname = obj['DEFAULT_ATTRIBUTE'][0]

        with pytest.warns(UnicodeWarning):
            _ = obname.id

        dlisio.set_encodings(['koi8_r'])
        assert obname.id == 'КОТ'
    finally:
        dlisio.set_encodings(prev_encodings)
        f.close()
Exemple #10
0
def test_broken_utf8_value(tmpdir, merge_files_oneLR):
    path = os.path.join(str(tmpdir), 'broken_utf8_value.dlis')
    content = [
        'data/chap3/start.dlis.part',
        'data/chap3/template/default.dlis.part',
        'data/chap3/object/object.dlis.part',
        'data/chap3/objattr/broken-utf8-value.dlis.part',
    ]
    merge_files_oneLR(path, content)

    prev_encodings = dlisio.get_encodings()
    dlisio.set_encodings([])
    with pytest.warns(UnicodeWarning):
        with dlisio.load(path) as (f, *_):
            obj = f.object('VERY_MUCH_TESTY_SET', 'OBJECT')
            _ = obj['DEFAULT_ATTRIBUTE']
    try:
        dlisio.set_encodings(['koi8_r'])
        with dlisio.load(path) as (f, *_):
            obj = f.object('VERY_MUCH_TESTY_SET', 'OBJECT', 1, 1)
            assert obj['DEFAULT_ATTRIBUTE'] == ['ВАЖНЫЙ-ПАРАМЕТР']
            assert obj.stash['DEFAULT_ATTRIBUTE'] == ['ВАЖНЫЙ-ПАРАМЕТР']
            assert obj.attic['DEFAULT_ATTRIBUTE'].units == "2 локтя на долю"
    finally:
        dlisio.set_encodings(prev_encodings)
Exemple #11
0
def test_broken_utf8_object_fp(tmpdir, merge_files_oneLR):
    # Should be able to create object.fingerprint regardless of
    # encoding
    path = os.path.join(str(tmpdir), 'broken_utf8_object_name.dlis')
    content = [
        'data/chap3/start.dlis.part',
        'data/chap3/template/default.dlis.part',
        'data/chap3/object/broken-utf8-object.dlis.part',
    ]
    merge_files_oneLR(path, content)

    prev_encodings = dlisio.get_encodings()
    dlisio.set_encodings(['koi8_r'])

    try:
        f, = dlisio.load(path)

        # With the correct encoding we should get a nice fingerprint
        obj = f.object('VERY_MUCH_TESTY_SET', 'КАДР')
        assert obj.fingerprint == 'T.VERY_MUCH_TESTY_SET-I.КАДР-O.12-C.0'

        dlisio.set_encodings([])

        # Without an matching encoding we should get a warning and
        # a byte object
        expected = b'T.VERY_MUCH_TESTY_SET-I.\xeb\xe1\xe4\xf2-O.12-C.0'

        with pytest.warns(UnicodeWarning):
            fp = obj.fingerprint

        assert fp == expected
    finally:
        dlisio.set_encodings(prev_encodings)
        f.close()
Exemple #12
0
def test_broken_utf8_set(tmpdir, merge_files_oneLR):
    path = os.path.join(str(tmpdir), 'broken_utf8_set.dlis')
    content = [
        'data/chap3/sul.dlis.part',
        'data/chap3/set/broken-utf8-set.dlis.part',
        'data/chap3/template/default.dlis.part',
        'data/chap3/object/object.dlis.part',
    ]
    merge_files_oneLR(path, content)

    prev_encodings = dlisio.get_encodings()

    try:
        # at the moment load uses object_pool.types, so set encoding after load
        f, = dlisio.load(path)
        dlisio.set_encodings([])

        with pytest.warns(UnicodeWarning):
            _ = f.object_pool.types

        dlisio.set_encodings(['koi8_r'])
        assert 'СЕТ_КИРИЛЛИЦЕЙ' in f.object_pool.types
        #assert set_name == 'МЕНЯ.ЗОВУТ.СЕТ'
    finally:
        dlisio.set_encodings(prev_encodings)
        f.close()
Exemple #13
0
def test_access_to_object_with_non_utf8_name(tmpdir, merge_files_oneLR):
    # Object with a non-utf8 name can be queried with dlis.object
    # regardless of what encoding is set.

    path = os.path.join(str(tmpdir), 'broken_utf8_object_name.dlis')
    content = [
        'data/chap3/start.dlis.part',
        'data/chap3/template/default.dlis.part',
        'data/chap3/object/broken-utf8-object.dlis.part',
    ]
    merge_files_oneLR(path, content)

    prev_encodings = dlisio.get_encodings()
    dlisio.set_encodings([])

    try:
        f, = dlisio.load(path)

        # Can't expect to find an object with an encoded string
        # if the encoding is not given to dlisio
        with pytest.raises(ValueError):
            with pytest.warns(UnicodeWarning):
                _ = f.object('VERY_MUCH_TESTY_SET', 'КАДР')

        # However it can be found by matching the bytes
        with pytest.warns(UnicodeWarning):
            obj = f.object('VERY_MUCH_TESTY_SET', b'\xeb\xe1\xe4\xf2')

        assert obj.name == b'\xeb\xe1\xe4\xf2'

        # When the encoding of the string parameter matches the encoding dlisio
        # uses, we should expect to find the object
        dlisio.set_encodings(['koi8_r'])
        obj = f.object('VERY_MUCH_TESTY_SET', 'КАДР')
        assert obj.name == 'КАДР'

    finally:
        dlisio.set_encodings(prev_encodings)
        f.close()
Exemple #14
0
def test_broken_utf8_label(tmpdir, merge_files_oneLR):
    path = os.path.join(str(tmpdir), 'broken_utf8_label.dlis')
    content = [
        'data/chap3/start.dlis.part',
        'data/chap3/template/broken-utf8-label.dlis.part',
        'data/chap3/object/object.dlis.part',
    ]
    merge_files_oneLR(path, content)

    prev_encodings = dlisio.get_encodings()
    dlisio.set_encodings([])

    try:
        f, = dlisio.load(path)
        obj = f.object('VERY_MUCH_TESTY_SET', 'OBJECT', 1, 1)

        with pytest.warns(UnicodeWarning):
            _ = obj.attic.keys()

        dlisio.set_encodings(['koi8_r'])
        assert 'ДОХЛЫЙ-ПАРАМЕТР' in obj.attic.keys()
    finally:
        f.close()
        dlisio.set_encodings(prev_encodings)
Exemple #15
0
# python 3.6.8
# DLISIO v0.1.15
# numpy v1.16.2
# pandas v0.24.1
# lasio v0.23

import os
import pandas as pd
import lasio
import dlisio
dlisio.set_encodings(['latin1'])
import numpy as np


def convert_dlis_to_las(filepath, output_folder_location, null=-999.25):
    filename = os.path.basename(filepath)
    filename = os.path.splitext(filename)[0]
    embedded_files = []
    origins = []
    frame_count = 0

    def df_column_uniquify(df):
        df_columns = df.columns
        new_columns = []
        for item in df_columns:
            counter = 0
            newitem = item
            while newitem in new_columns:
                counter += 1
                newitem = "{}_{}".format(item, counter)
            new_columns.append(newitem)