Example #1
0
def distributed_train(model, train_method, nodes, fts_method, data, num_batches=10,
                      train_parameters={}, **kwargs):
    import dispy, dispy.httpd, datetime

    batch_save = kwargs.get('batch_save', False)  # save model between batches

    batch_save_interval = kwargs.get('batch_save_interval', 1)

    file_path = kwargs.get('file_path', None)

    cluster, http_server = start_dispy_cluster(train_method, nodes)

    print("[{0: %H:%M:%S}] Distrituted Train Started with {1} CPU's"
          .format(datetime.datetime.now(), get_number_of_cpus(cluster)))

    jobs = []
    n = len(data)
    batch_size = int(n / num_batches)
    bcount = 1
    for ct in range(model.order, n, batch_size):
        if model.is_multivariate:
            ndata = data.iloc[ct - model.order:ct + batch_size]
        else:
            ndata = data[ct - model.order: ct + batch_size]

        tmp_model = fts_method()

        tmp_model.clone_parameters(model)

        job = cluster.submit(tmp_model, ndata, train_parameters)
        job.id = bcount  # associate an ID to identify jobs (if needed later)
        jobs.append(job)

        bcount += 1

    for job in jobs:
        print("[{0: %H:%M:%S}] Processing batch ".format(datetime.datetime.now()) + str(job.id))
        tmp = job()
        if job.status == dispy.DispyJob.Finished and tmp is not None:
            model.merge(tmp)

            if batch_save and (job.id % batch_save_interval) == 0:
                Util.persist_obj(model, file_path)

        else:
            print(job.exception)
            print(job.stdout)

        print("[{0: %H:%M:%S}] Finished batch ".format(datetime.datetime.now()) + str(job.id))

    print("[{0: %H:%M:%S}] Distrituted Train Finished".format(datetime.datetime.now()))

    stop_dispy_cluster(cluster, http_server)

    return model
Example #2
0
def train_individual_model(partitioner, train_data, indexer):
    pttr = str(partitioner.__module__).split('.')[-1]
    diff = "_diff" if partitioner.transformation is not None else ""
    _key = "msfts_" + pttr + str(
        partitioner.partitions) + diff + "_" + indexer.name

    print(_key)

    model = cmsfts.ContextualMultiSeasonalFTS(_key, indexer=indexer)
    model.append_transformation(partitioner.transformation)
    model.train(train_data, partitioner.sets, order=1)

    cUtil.persist_obj(model, "models/" + _key + ".pkl")

    return model
Example #3
0
    def train(self, data, **kwargs):
        self.original_max = max(self.indexer.get_data(data))
        self.original_min = min(self.indexer.get_data(data))

        num_cores = multiprocessing.cpu_count()

        pool = {}
        count = 0
        for ix in self.indexers:
            for pt in self.partitioners:
                pool[count] = {'ix': ix, 'pt': pt}
                count += 1

        results = Parallel(n_jobs=num_cores)(delayed(train_individual_model)(
            deepcopy(pool[m]['pt']), data, deepcopy(pool[m]['ix']))
                                             for m in pool.keys())

        for tmp in results:
            self.append_model(tmp)

        cUtil.persist_obj(self, "models/" + self.name + ".pkl")
Example #4
0
    def fit(self, ndata, **kwargs):
        """
        Fit the model's parameters based on the training data.

        :param ndata: training time series data
        :param kwargs:

        :keyword num_batches: split the training data in num_batches to save memory during the training process
        :keyword save_model: save final model on disk
        :keyword batch_save: save the model between each batch
        :keyword file_path: path to save the model
        :keyword distributed: boolean, indicate if the training procedure will be distributed in a dispy cluster
        :keyword nodes: a list with the dispy cluster nodes addresses

        """

        import datetime

        if self.is_multivariate:
            data = ndata
        else:
            data = self.apply_transformations(ndata)

            self.original_min = np.nanmin(data)
            self.original_max = np.nanmax(data)

        if 'partitioner' in kwargs:
            self.partitioner = kwargs.pop('partitioner')

        if not self.is_multivariate and not self.is_wrapper and not self.benchmark_only:
            if self.partitioner is None:
                raise Exception(
                    "Fuzzy sets were not provided for the model. Use 'partitioner' parameter. "
                )

        if 'order' in kwargs:
            self.order = kwargs.pop('order')

        dump = kwargs.get('dump', None)

        num_batches = kwargs.get('num_batches', 10)

        save = kwargs.get('save_model', False)  # save model on disk

        batch_save = kwargs.get('batch_save',
                                False)  #save model between batches

        file_path = kwargs.get('file_path', None)

        distributed = kwargs.get('distributed', False)

        batch_save_interval = kwargs.get('batch_save_interval', 10)

        if distributed is not None and distributed:

            if distributed == 'dispy':
                from pyFTS.distributed import dispy
                nodes = kwargs.get('nodes', False)
                train_method = kwargs.get('train_method',
                                          dispy.simple_model_train)
                dispy.distributed_train(
                    self,
                    train_method,
                    nodes,
                    type(self),
                    data,
                    num_batches, {},
                    batch_save=batch_save,
                    file_path=file_path,
                    batch_save_interval=batch_save_interval)
            elif distributed == 'spark':
                from pyFTS.distributed import spark
                url = kwargs.get('url', 'spark://192.168.0.110:7077')
                app = kwargs.get('app', 'pyFTS')

                spark.distributed_train(self, data, url=url, app=app)
        else:

            if dump == 'time':
                print("[{0: %H:%M:%S}] Start training".format(
                    datetime.datetime.now()))

            if num_batches is not None and not self.is_wrapper:
                n = len(data)
                batch_size = int(n / num_batches)
                bcount = 1

                rng = range(self.order, n, batch_size)

                if dump == 'tqdm':
                    from tqdm import tqdm

                    rng = tqdm(rng)

                for ct in rng:
                    if dump == 'time':
                        print("[{0: %H:%M:%S}] Starting batch ".format(
                            datetime.datetime.now()) + str(bcount))
                    if self.is_multivariate:
                        mdata = data.iloc[ct - self.order:ct + batch_size]
                    else:
                        mdata = data[ct - self.order:ct + batch_size]

                    self.train(mdata, **kwargs)

                    if batch_save:
                        Util.persist_obj(self, file_path)

                    if dump == 'time':
                        print("[{0: %H:%M:%S}] Finish batch ".format(
                            datetime.datetime.now()) + str(bcount))

                    bcount += 1

            else:
                self.train(data, **kwargs)

            if dump == 'time':
                print("[{0: %H:%M:%S}] Finish training".format(
                    datetime.datetime.now()))

        if save:
            Util.persist_obj(self, file_path)
Example #5
0
model.fit(train, method='GD', alpha=0.5, momentum=None, iteractions=1)
'''
model.fit(train, method='GA', ngen=15, #number of generations
    mgen=7, # stop after mgen generations without improvement
    npop=15, # number of individuals on population
    pcruz=.5, # crossover percentual of population
    pmut=.3, # mutation percentual of population
    window_size = 7000,
    train_rate = .8,
    increment_rate =.2,
    experiments=1
    )
'''

Util.persist_obj(model, 'fcm_fts10c')
'''
model = Util.load_obj('fcm_fts05c')
'''
#forecasts = model.predict(test)

#print(model)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 5])

ax.plot(test, label='Original')

forecasts = model.predict(test)

for w in np.arange(model.order):
    forecasts.insert(0, None)
Example #6
0
sonda_treino = sonda[:1051200]
sonda_teste = sonda[1051201:]

#res = bchmk.simpleSearch_RMSE(sonda_treino, sonda_teste,
#                              sfts.SeasonalFTS,np.arange(3,30),[1],parameters=1440,
#                              tam=[15,8], plotforecasts=False,elev=45, azim=40,
#                               save=False,file="pictures/sonda_sfts_error_surface", intervals=False)

from pyFTS.common import Util
from pyFTS.models import cmsfts

partitions = ['grid', 'entropy']

indexers = ['m15', 'Mh', 'Mhm15']

for max_part in [40, 50]:
    for part in partitions:
        fs = Util.load_obj("models/sonda_fs_" + part + "_" + str(max_part) +
                           ".pkl")

        for ind in indexers:
            ix = Util.load_obj("models/sonda_ix_" + ind + ".pkl")

            model = cmsfts.ContextualMultiSeasonalFTS(part + " " + ind, ix)

            model.train(sonda_treino, fs)

            Util.persist_obj(
                model, "models/sonda_cmsfts_" + part + "_" + str(max_part) +
                "_" + ind + ".pkl")
Example #7
0
    def fit(self, ndata, **kwargs):
        """

        :param data: the training time series
        :param kwargs:

        :keyword
        num_batches: split the training data in num_batches to save memory during the training process
        save_model: save final model on disk
        batch_save: save the model between each batch
        file_path: path to save the model
        distributed: boolean, indicate if the training procedure will be distributed in a dispy cluster
        nodes: a list with the dispy cluster nodes addresses

        :return:
        """

        import datetime

        if self.is_multivariate:
            data = ndata
        else:
            data = self.apply_transformations(ndata)

        self.original_min = np.nanmin(data)
        self.original_max = np.nanmax(data)

        if 'sets' in kwargs:
            self.sets = kwargs.pop('sets')

        if 'partitioner' in kwargs:
            self.partitioner = kwargs.pop('partitioner')

        if (self.sets is None or len(self.sets) == 0) and not self.benchmark_only:
            if self.partitioner is not None:
                self.sets = self.partitioner.sets
            else:
                raise Exception("Fuzzy sets were not provided for the model. Use 'sets' parameter or 'partitioner'. ")

        if 'order' in kwargs:
            self.order = kwargs.pop('order')

        dump = kwargs.get('dump', None)

        num_batches = kwargs.get('num_batches', None)

        save = kwargs.get('save_model', False)  # save model on disk

        batch_save = kwargs.get('batch_save', False) #save model between batches

        file_path = kwargs.get('file_path', None)

        distributed = kwargs.get('distributed', False)

        batch_save_interval = kwargs.get('batch_save_interval', 10)

        if distributed:
            nodes = kwargs.get('nodes', False)
            train_method = kwargs.get('train_method', Util.simple_model_train)
            Util.distributed_train(self, train_method, nodes, type(self), data, num_batches, {},
                                   batch_save=batch_save, file_path=file_path,
                                   batch_save_interval=batch_save_interval)
        else:

            if dump == 'time':
                print("[{0: %H:%M:%S}] Start training".format(datetime.datetime.now()))

            if num_batches is not None:
                n = len(data)
                batch_size = int(n / num_batches)
                bcount = 1

                rng = range(self.order, n, batch_size)

                if dump == 'tqdm':
                    from tqdm import tqdm

                    rng = tqdm(rng)

                for ct in rng:
                    if dump == 'time':
                        print("[{0: %H:%M:%S}] Starting batch ".format(datetime.datetime.now()) + str(bcount))
                    if self.is_multivariate:
                        mdata = data.iloc[ct - self.order:ct + batch_size]
                    else:
                        mdata = data[ct - self.order : ct + batch_size]

                    self.train(mdata, **kwargs)

                    if batch_save:
                        Util.persist_obj(self,file_path)

                    if dump == 'time':
                        print("[{0: %H:%M:%S}] Finish batch ".format(datetime.datetime.now()) + str(bcount))

                    bcount += 1

            else:
                self.train(data, **kwargs)

            if dump == 'time':
                print("[{0: %H:%M:%S}] Finish training".format(datetime.datetime.now()))

        if save:
            Util.persist_obj(self, file_path)
Example #8
0
order = 3
nparts = 20

fuzzysets = []
fuzzysets.append(Grid.GridPartitioner(fln_train.glo_avg,nparts))
fuzzysets.append(Grid.GridPartitioner(joi_train.glo_avg,nparts))
fuzzysets.append(Grid.GridPartitioner(sbr_train.glo_avg,nparts))

d = {'fln_glo_avg':fln_train.glo_avg,'sbr_glo_avg':sbr_train.glo_avg,'joi_glo_avg':joi_train.glo_avg}
data_train = pd.DataFrame(d)
data_train = data_train.dropna(axis=0, how='any')

model_file = "models/fts/multivariate/mvhofts-"+str(order)+"-"+str(nparts)+".pkl"



mvhofts = mvhofts.MultivariateHighOrderFTS("")
mvhofts.train(data_train,fuzzysets,order)
cUtil.persist_obj(mvhofts, model_file)


obj = cUtil.load_obj(model_file)
dt = {'fln_glo_avg':fln_test.glo_avg,'sbr_glo_avg':sbr_test.glo_avg,'joi_glo_avg':joi_test.glo_avg}
data_test = pd.DataFrame(dt)
data_test = data_test.dropna(axis=0, how='any')

ret = obj.forecast(data_test)

print("RMSE: " + str(Measures.rmse(list(data_test.fln_glo_avg[order:]), ret[:-1])))
#print(mvhofts)