def test_as_type(): assert str(dt.as_type(f.A, int)) == str(f.A.as_type(int)) assert str(dt.as_type(f[:], int)) == str(f[:].as_type(int)) DT = dt.Frame({'A': ['1.0', '1.0', '2.0', '1.0', '2'], 'B': [None, '2', '3', '4', '5'], 'C': [1, 2, 1, 1, 2]}) assert_equals(DT[:, f.A.as_type(int)], DT[:, dt.as_type(f.A, int)]) assert_equals(DT[:, f[:].as_type(float)], DT[:, dt.as_type(f[:], float)])
def test_as_type_arguments(): msg = r"Function datatable.as_type\(\) requires exactly 2 positional " \ r"arguments, but none were given" with pytest.raises(TypeError, match=msg): as_type() msg = r"Function datatable.as_type\(\) requires exactly 2 positional " \ r"arguments, but only 1 was given" with pytest.raises(TypeError, match=msg): as_type(f.A) msg = r"Function datatable.as_type\(\) takes at most 2 positional " \ r"arguments, but 3 were given" with pytest.raises(TypeError, match=msg): as_type(f.A, f.B, f.C)
def test_as_type(target): DT = dt.Frame(A=range(5)) assert_equals(DT[:, as_type(f.A, target)], dt.Frame(A=range(5), stype=target))
def test_as_type_repr(): assert repr(as_type(f.A, dt.int64)) == 'FExpr<as_type(f.A, int64)>' assert repr(as_type(f[1], dt.str32)) == 'FExpr<as_type(f[1], str32)>'
# Check to see if current ID exists in the other releases unknownSubjects['foundInFreeze1'] = dt.Frame( [d in otherFreezeIDs for d in unknownSubjects['id'].to_list()[0]]) unknownSubjects['foundInNovelOmics'] = dt.Frame( [d in novelOmicsIDs for d in unknownSubjects['id'].to_list()[0]]) # remove the RD3 '_original' suffix from subject IDs (before sending) unknownSubjects['id'] = dt.Frame( [d.replace('_original', '') for d in unknownSubjects['id'].to_list()[0]]) unknownSubjects[:, dt.update( foundInFreeze1=as_type(f.foundInFreeze1, str), foundInFreeze2=as_type(f.foundInFreeze2, str), foundInNovelOmics=as_type(f.foundInNovelOmics, str), )] unknownSubjects.to_csv('data/rd3_freeze1_patch3_missing_subjects.csv') # Merge files if applicable # allUnknownSubjects = rbind( # fread('data/rd3_freeze1_patch3_missing_subjects.csv'), # fread('data/rd3_freeze2_patch1_missing_subjects.csv'), # ) # # allUnknownSubjects[ # (f.foundInFreeze1 == False) & # (f.foundInFreeze2 == False) &
def test_astype_type(): DT = dt.Frame(A=range(10)) assert_equals(DT[:, as_type(f.A, dt.Type.float64)], dt.Frame(A=range(10), stype=dt.float64))
# Download the latest file and import the contents. Select the columns of # interest and rename them to align with the RD3 EMX. Set the key as well. newData = fread('')[:, [ 'sample_id', 'participant_subject', 'pathological state', 'tumor cell fraction' ]] newData.names = { 'sample_id': 'sampleID', 'participant_subject': 'subjectID', 'pathological state': 'pathologicalState', 'tumor cell fraction': 'percentageTumorCells' } newData[:, dt.update(sampleID=as_type(f.sampleID, str))] newData.key = 'sampleID' # ~ 1b ~ # Pull the deepwes data from RD3 # Unnest reference attributes and set key samples = rd3.get(entity='rd3_noveldeepwes_sample', attributes='id,sampleID,subject', batch_size=10000) for row in samples: row['subject'] = row['subject']['subjectID'] samples = dt.Frame(samples)