def __new__(cls, data, address='localhost:9465', batch=100, max_iter=100): from orangecontrib.remote import aborted, save_state import Orange.data.sql.table cont = Continuize(multinomial_treatment=Continuize.Remove, normalize_continuous=None) data = cont(data) pca = Orange.projection.IncrementalPCA() percent = batch / data.approx_len() * 100 if percent < 100: data_sample = data.sample_percentage(percent, no_cache=True) else: data_sample = data data_sample.download_data(1000000) data_sample = Orange.data.Table.from_numpy( Orange.data.Domain(data_sample.domain.attributes), data_sample.X) model = pca(data_sample) save_state(model) for i in range(max_iter if percent < 100 else 0): data_sample = data.sample_percentage(percent, no_cache=True) data_sample.download_data(1000000) data_sample = Orange.data.Table.from_numpy( Orange.data.Domain(data_sample.domain.attributes), data_sample.X) model.partial_fit(data_sample) model.iteration = i save_state(model) if aborted(): break return model
def __new__(cls, data, address='localhost:9465', batch=100, max_iter=100): from orangecontrib.remote import aborted, save_state import Orange.data.sql.table cont = Continuize(multinomial_treatment=Continuize.Remove) data = cont(data) pca = Orange.projection.IncrementalPCA() percent = batch / data.approx_len() * 100 if percent < 100: data_sample = data.sample_percentage(percent, no_cache=True) else: data_sample = data data_sample.download_data(1000000) data_sample = Orange.data.Table.from_numpy( Orange.data.Domain(data_sample.domain.attributes), data_sample.X) model = pca(data_sample) save_state(model) for i in range(max_iter if percent < 100 else 0): data_sample = data.sample_percentage(percent, no_cache=True) data_sample.download_data(1000000) data_sample = Orange.data.Table.from_numpy( Orange.data.Domain(data_sample.domain.attributes), data_sample.X) model.partial_fit(data_sample) model.iteration = i save_state(model) if aborted(): break return model
def __new__(cls, data, batch=100, max_iter=100): cont = Continuize(multinomial_treatment=Continuize.Remove) data = cont(data) model = Orange.projection.IncrementalPCA() percent = batch / data.approx_len() * 100 for i in range(max_iter): data_sample = data.sample_percentage(percent, no_cache=True) if not data_sample: continue data_sample.download_data(1000000) data_sample = Orange.data.Table.from_numpy( Orange.data.Domain(data_sample.domain.attributes), data_sample.X) model = model.partial_fit(data_sample) model.iteration = i save_state(model) if aborted() or data_sample is data: break return model