def infert_ds(label_index, label_name='Label'): file_schema = 'sep=, col=id:TX:0 col=education:TX:1 col={}:R4:{} ' \ 'col=Features:R4:{}-8 header=+'.format( label_name, label_index, label_index + 1) data = FileDataStream(infert_file, schema=file_schema) if label_name != 'Label': data._set_role(Role.Label, label_name) return data
def data_wt_rename(self, label_name, group_id, features): simpleinput_file = get_dataset("gen_tickettrain").as_filepath() file_schema = 'sep=, col={label}:R4:0 col={group_id}:TX:1 ' \ 'col={features}:R4:3-5'.format( label=label_name, group_id=group_id, features=features) data = FileDataStream(simpleinput_file, schema=file_schema) if label_name != 'Label': data._set_role(Role.Label, label_name) return data
def test_linear_file_role(self): pipeline = Pipeline([OneHotVectorizer() << categorical_columns, FastLinearBinaryClassifier(train_threads=1, shuffle=False)]) train_stream = FileDataStream(train_file, schema=file_schema) train_stream._set_role('Label', label_column) pipeline.fit(train_stream) test_stream = FileDataStream(test_file, schema=file_schema) out_data = pipeline.predict(test_stream) check_accuracy(test_file, label_column, out_data, 0.65)