Exemple #1
0
def test_as_type():
    assert str(dt.as_type(f.A, int)) == str(f.A.as_type(int))
    assert str(dt.as_type(f[:], int)) == str(f[:].as_type(int))
    DT = dt.Frame({'A': ['1.0', '1.0', '2.0', '1.0', '2'],
                   'B': [None, '2', '3', '4', '5'],
                   'C': [1, 2, 1, 1, 2]})

    assert_equals(DT[:, f.A.as_type(int)], DT[:, dt.as_type(f.A, int)])
    assert_equals(DT[:, f[:].as_type(float)], DT[:, dt.as_type(f[:], float)])
Exemple #2
0
def test_as_type_arguments():
    msg = r"Function datatable.as_type\(\) requires exactly 2 positional " \
          r"arguments, but none were given"
    with pytest.raises(TypeError, match=msg):
        as_type()

    msg = r"Function datatable.as_type\(\) requires exactly 2 positional " \
          r"arguments, but only 1 was given"
    with pytest.raises(TypeError, match=msg):
        as_type(f.A)

    msg = r"Function datatable.as_type\(\) takes at most 2 positional " \
          r"arguments, but 3 were given"
    with pytest.raises(TypeError, match=msg):
        as_type(f.A, f.B, f.C)
Exemple #3
0
def test_as_type(target):
    DT = dt.Frame(A=range(5))
    assert_equals(DT[:, as_type(f.A, target)],
                  dt.Frame(A=range(5), stype=target))
Exemple #4
0
def test_as_type_repr():
    assert repr(as_type(f.A, dt.int64)) == 'FExpr<as_type(f.A, int64)>'
    assert repr(as_type(f[1], dt.str32)) == 'FExpr<as_type(f[1], str32)>'
Exemple #5
0
# Check to see if current ID exists in the other releases

unknownSubjects['foundInFreeze1'] = dt.Frame(
    [d in otherFreezeIDs for d in unknownSubjects['id'].to_list()[0]])

unknownSubjects['foundInNovelOmics'] = dt.Frame(
    [d in novelOmicsIDs for d in unknownSubjects['id'].to_list()[0]])

# remove the RD3 '_original' suffix from subject IDs (before sending)
unknownSubjects['id'] = dt.Frame(
    [d.replace('_original', '') for d in unknownSubjects['id'].to_list()[0]])

unknownSubjects[:,
                dt.update(
                    foundInFreeze1=as_type(f.foundInFreeze1, str),
                    foundInFreeze2=as_type(f.foundInFreeze2, str),
                    foundInNovelOmics=as_type(f.foundInNovelOmics, str),
                )]

unknownSubjects.to_csv('data/rd3_freeze1_patch3_missing_subjects.csv')

# Merge files if applicable
# allUnknownSubjects = rbind(
#     fread('data/rd3_freeze1_patch3_missing_subjects.csv'),
#     fread('data/rd3_freeze2_patch1_missing_subjects.csv'),
# )
#
# allUnknownSubjects[
#     (f.foundInFreeze1 == False) &
#     (f.foundInFreeze2 == False) &
Exemple #6
0
def test_astype_type():
    DT = dt.Frame(A=range(10))
    assert_equals(DT[:, as_type(f.A, dt.Type.float64)],
                  dt.Frame(A=range(10), stype=dt.float64))
# Download the latest file and import the contents. Select the columns of
# interest and rename them to align with the RD3 EMX. Set the key as well.

newData = fread('')[:, [
    'sample_id', 'participant_subject', 'pathological state',
    'tumor cell fraction'
]]

newData.names = {
    'sample_id': 'sampleID',
    'participant_subject': 'subjectID',
    'pathological state': 'pathologicalState',
    'tumor cell fraction': 'percentageTumorCells'
}

newData[:, dt.update(sampleID=as_type(f.sampleID, str))]

newData.key = 'sampleID'

# ~ 1b ~
# Pull the deepwes data from RD3
# Unnest reference attributes and set key

samples = rd3.get(entity='rd3_noveldeepwes_sample',
                  attributes='id,sampleID,subject',
                  batch_size=10000)

for row in samples:
    row['subject'] = row['subject']['subjectID']

samples = dt.Frame(samples)