def test1(data, features): data = data[:, 1:20] features = features[0:data.shape[1]] arcs, mixt = getArchetypes(data, 3) nrfolds = 10 stats = Stats(name=dsname) for train, test, i in kfolded(mixt, nrfolds): c = Chrono().start() spn = SPN.LearnStructure(train, featureTypes=["continuous"] * train.shape[1], row_split_method=Splitting.KmeansRows(), col_split_method=Splitting.RDCTest(threshold=0.3), # spn = SPN.LearnStructure(data, featureNames=["X1"], domains = # domains, families=families, row_split_method=Splitting.KmeansRows(), # col_split_method=Splitting.RDCTest(), min_instances_slice=100) c.end() spn.root.validate() ll = numpy.mean(spn.root.eval(test)) print(ll) stats.add("HSPN", Stats.LOG_LIKELIHOOD, ll) stats.add("HSPN", Stats.TIME, c.elapsed()) stats.save("stats_" + dsname + ".json") print(arcs)
def computeNIPS(dimensions): dsname, data, features = getNips() lda = LatentDirichletAllocation(n_topics=dimensions, max_iter=50, learning_method='online', learning_offset=50., random_state=0) lda.fit(data) mixt = lda.transform(data) def normalize(data): mixtd = data mixtd[mixtd == 1] = mixtd[mixtd == 1] - 0.0000001 mixtd[mixtd == 0] = mixtd[mixtd == 0] + 0.0000001 mixtnorm = numpy.sum(mixtd, axis=1) mixtd = numpy.divide(mixtd, mixtnorm[:, None]) return mixtd+0.0 mixt = normalize(mixt) print(data.shape) featureTypes = ["continuous"] * mixt.shape[1] domains = [[0,1]] * mixt.shape[1] print(domains) @memory.cache def learn(data): spn = SPN.LearnStructure(data, featureTypes=featureTypes, row_split_method=Splitting.Gower(), col_split_method=Splitting.RDCTest(threshold=0.3), domains=domains, alpha=0.1, families = ['histogram'] * data.shape[1], # spn = SPN.LearnStructure(data, featureNames=["X1"], domains = # domains, families=families, row_split_method=Splitting.KmeansRows(), # col_split_method=Splitting.RDCTest(), min_instances_slice=100) return spn stats = Stats(name=dsname) for train, test, i in kfolded(mixt, 10): print(i) #dirichlet_alphas = getDirichlet(train) dirichlet_alphas = dirichlet.mle(train, method='meanprecision', maxiter=1000000) print("dirichlet done") ll = scipy.stats.dirichlet.logpdf(numpy.transpose(test), alpha=dirichlet_alphas) stats.add("DIRICHLET", Stats.LOG_LIKELIHOOD, numpy.sum(ll)) stats.add("DIRICHLET", Stats.MEAN_LOG_LIKELIHOOD, numpy.mean(ll)) spn = learn(train) print("spn done") ll = spn.root.eval(test) print(stats.add("SPN", Stats.LOG_LIKELIHOOD, numpy.sum(ll))) print(stats.add("SPN", Stats.MEAN_LOG_LIKELIHOOD, numpy.mean(ll))) stats.save("results/nips/"+ dsname + "-" + str(dimensions) + ".json")
def getHydrochemLL(): dsname, data, features = getHydrochem() print(data) print(data.shape) featureTypes = ["continuous"] * data.shape[1] domains = [[0, 1]] * data.shape[1] print(domains) families = ['piecewise'] * data.shape[1] #families = ['histogram'] * data.shape[1] #@memory.cache def learn(data, families, mininst, alpha, th): spn = SPN.LearnStructure( data, featureTypes=featureTypes, row_split_method=Splitting.Gower(), col_split_method=Splitting.RDCTest(threshold=th), domains=domains, alpha=alpha, families=families, # spn = SPN.LearnStructure(data, featureNames=["X1"], domains = # domains, families=families, row_split_method=Splitting.KmeansRows(), # col_split_method=Splitting.RDCTest(), min_instances_slice=mininst) return spn stats = Stats(name=dsname) alll = [] for train, test, i in kfolded(data, 5): dirichlet_alphas = dirichlet.mle(train, method='meanprecision', maxiter=100000) ll = scipy.stats.dirichlet.logpdf(numpy.transpose(test), alpha=dirichlet_alphas) stats.add("DIRICHLET", Stats.LOG_LIKELIHOOD, numpy.sum(ll)) stats.add("DIRICHLET", Stats.MEAN_LOG_LIKELIHOOD, numpy.mean(ll)) spn = learn(train, families, 10, 0.1, 0.1) ll = spn.root.eval(test) alll.append(numpy.mean(ll)) stats.add("SPN", Stats.LOG_LIKELIHOOD, numpy.sum(ll)) stats.add("SPN", Stats.MEAN_LOG_LIKELIHOOD, numpy.mean(ll)) print(numpy.mean(alll)) stats.save("results/hydrochems/" + dsname + ".json")
def img_writer(info: dict, pdfm: list, df: PandasDF, job) -> None: try: out_image = filename_constructor(info=info, folder="mc", mc=True) df.plot(lw=3, color='r') for pdf in pdfm: pdf.plot() plt.savefig(out_image) plt.close() img = "{0}=={1}=={2}=={3}=={4}.png".format(info["broker"], info["symbol"], \ info["period"], info["system"], info["direction"]) stats = Stats.objects.get(broker__slug=info["broker"], symbol__symbol=info["symbol"], \ period__period=info["period"], system__title=info["system"], direction=job.direction) stats.mc = "https://quantrade.co.uk/static/collector/images/mc/" + img stats.save() print(colored.green("Image saved {}.".format(img))) except Exception as err: print(colored.red("img_writer {}".format(err)))
def getAirPollution(dimensions): dsname, data, features = getAirQualityUCITimeless() idxmissing = data == -200 data = data[:, numpy.sum(idxmissing,0) < 2000] idxmissing = data == -200 data = data[numpy.sum(idxmissing,1) == 0, :] idxmissing = data == -200 print(data.shape) _, mixt = getArchetypes(data, dimensions) if mixt is None: print( "no archetypes", dimensions) #0/0 return def normalize(data): mixtd = data mixtd[mixtd == 1] = mixtd[mixtd == 1] - 0.0000001 mixtd[mixtd == 0] = mixtd[mixtd == 0] + 0.0000001 mixtnorm = numpy.sum(mixtd, axis=1) mixtd = numpy.divide(mixtd, mixtnorm[:, None]) return mixtd+0.0 mixt = normalize(mixt) print(data.shape) featureTypes = ["continuous"] * mixt.shape[1] domains = [[0,1]] * mixt.shape[1] print(domains) @memory.cache def learn(data): spn = SPN.LearnStructure(data, featureTypes=featureTypes, row_split_method=Splitting.Gower(), col_split_method=Splitting.RDCTest(threshold=0.3), domains=domains, alpha=0.1, families = ['histogram'] * data.shape[1], # spn = SPN.LearnStructure(data, featureNames=["X1"], domains = # domains, families=families, row_split_method=Splitting.KmeansRows(), # col_split_method=Splitting.RDCTest(), #min_instances_slice=int(data.shape[0]*0.01)) min_instances_slice=200) return spn stats = Stats(name=dsname) for train, test, i in kfolded(mixt, 10): print(i) #dirichlet_alphas = getDirichlet(train) dirichlet_alphas = dirichlet.mle(train, method='meanprecision', maxiter=1000000) print("dirichlet done") ll = scipy.stats.dirichlet.logpdf(numpy.transpose(test), alpha=dirichlet_alphas) stats.add("DIRICHLET", Stats.LOG_LIKELIHOOD, numpy.sum(ll)) stats.add("DIRICHLET", Stats.MEAN_LOG_LIKELIHOOD, numpy.mean(ll)) spn = learn(train) print("spn done") ll = spn.root.eval(test) print(stats.add("SPN", Stats.LOG_LIKELIHOOD, numpy.sum(ll))) print(stats.add("SPN", Stats.MEAN_LOG_LIKELIHOOD, numpy.mean(ll))) stats.save("results/airpollution/"+ dsname + "-" + str(dimensions) + ".json")