def launchsvdplus(self,input,submit_conf,output=None,npartitions=None,niterations=None,rank=None, minval=None,maxval=None,gamma1=None,gamma2=None,gamma6=None,gamma7=None,storage_level=None, scheduler_options=None,master=None): default_settings = get_default_settings('svd') if niterations is None: niterations = default_settings.get('niterations') if rank is None: rank = default_settings.get('rank') if minval is None: minval = default_settings.get('minval') if maxval is None: maxval = default_settings.get('maxval') if gamma1 is None: gamma1 = default_settings.get('gamma1') if gamma2 is None: gamma2 = default_settings.get('gamma2') if gamma6 is None: gamma6 = default_settings.get('gamma6') if gamma7 is None: gamma7 = default_settings.get('gamma7') if storage_level is None: storage_level = default_settings.get('storage_level') if output is None: output = default_settings.get('output') if npartitions is None: npartitions = default_settings.get('npartitions') class_params = [input,output,npartitions,niterations,rank,minval,maxval,gamma1,gamma2,gamma6,gamma7,storage_level] jar_directory = self.home_directory + "SVDPlusPlus/target/SVDPlusPlusApp-1.0.jar" self.submitter.submit(class_in_jar="src.main.scala.SVDPlusPlusApp",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchpca(self,input,submit_conf,dimensions=None,scheduler_options=None,master=None): default_settings = get_default_settings('pca') if dimensions is None: dimensions = default_settings.get('dimensions') class_params = [input,dimensions] jar_directory = self.home_directory + "PCA/target/PCAApp-1.0.jar" self.submitter.submit(class_in_jar="PCA.src.main.scala.PCAApp",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchgeneratetextfile(self,output,size,npartitions,submit_conf, scheduler_options=None,master=None): npoints = get_npoints_for_size('words',size) default_settings = get_default_settings('generatetext') class_params = [output,npoints,str(npartitions)] jar_directory = self.home_directory + "BenchMark-1.0-SNAPSHOT.jar" self.submitter.submit(class_in_jar="com.abrandon.upm.GenerateRandomText",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchngrams(self,input,submit_conf,output=None,scheduler_options=None,master=None): default_settings = get_default_settings('ngrams') if output is None: output = default_settings.get('output') class_params = [input,output] jar_directory = self.home_directory + "BenchMark-1.0-SNAPSHOT.jar" self.submitter.submit(class_in_jar="com.abrandon.upm.NGramsExample",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchgeneratedectreefile(self,output,size,npartitions,submit_conf,nfeatures=None, scheduler_options=None,master=None): npoints = get_npoints_for_size('decisiontree',size) default_settings = get_default_settings('generatedectree') if nfeatures is None: nfeatures = default_settings.get('nfeatures') class_params = [output,npoints,nfeatures,str(npartitions)] jar_directory = self.home_directory + "BenchMark-1.0-SNAPSHOT.jar" self.submitter.submit(class_in_jar="com.abrandon.upm.GenerateSVMData",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchgeneratepcafile(self,output,size,npartitions,submit_conf,nfeatures=None, scheduler_options=None,master=None): npoints = get_npoints_for_size('pca',size) default_settings = get_default_settings('generatepca') if nfeatures is None: nfeatures = default_settings.get('nfeatures') class_params = [output,npoints,nfeatures,str(npartitions)] jar_directory = self.home_directory + "PCA/target/PCAApp-1.0.jar" self.submitter.submit(class_in_jar="PCA.src.main.scala.PCADataGen",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchwordcount(self,input,submit_conf,output=None,npartitions=None,scheduler_options=None,master=None): default_settings = get_default_settings('wordcount') if output is None: output = default_settings.get('output') if npartitions is None: npartitions = default_settings.get('npartitions') class_params = [input,output,str(npartitions)] jar_directory = self.home_directory + "BenchMark-1.0-SNAPSHOT.jar" self.submitter.submit(class_in_jar="com.abrandon.upm.WordCount",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchstronglyconnectedcomponent(self,input,submit_conf,output=None,npartitions=None,scheduler_options=None, master=None): default_settings = get_default_settings('stronglyconnected') if output is None: output = default_settings.get('output') if npartitions is None: npartitions = default_settings.get('npartitions') class_params = [input,output,npartitions] jar_directory = self.home_directory + "StronglyConnectedComponent/target/StronglyConnectedComponentApp-1.0.jar" self.submitter.submit(class_in_jar="src.main.scala.StronglyConnectedComponentApp",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchlinearregression(self,input,submit_conf,output=None,max_iterations=None, scheduler_options=None,master=None): default_settings = get_default_settings('linearregression') if max_iterations is None: max_iterations = default_settings.get('max_iterations') if output is None: output = default_settings.get('output') class_params = [input,output,max_iterations] jar_directory = self.home_directory + "LinearRegression/target/LinearRegressionApp-1.0.jar" self.submitter.submit(class_in_jar="LinearRegression.src.main.java.LinearRegressionApp",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchgenerategraphfile(self,output,size,npartitions,submit_conf,scheduler_options=None,master=None,mu=None,sigma=None): npoints = get_npoints_for_size('graph',size) default_settings = get_default_settings('generategraph') if mu is None: mu = default_settings.get('mu') if sigma is None: sigma = default_settings.get('sigma') class_params = [output,npoints,str(npartitions),mu,sigma] jar_directory = self.home_directory + "common/target/Common-1.0.jar" self.submitter.submit(class_in_jar="DataGen.src.main.scala.GraphDataGen",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchsvm(self,input,submit_conf,npartitions=None,niterations=None, scheduler_options=None,master=None): default_settings = get_default_settings('svm') if niterations is None: niterations = default_settings.get('niterations') if npartitions is None: npartitions = default_settings.get('npartitions') class_params = [input,niterations,str(npartitions)] jar_directory = self.home_directory + "BenchMark-1.0-SNAPSHOT.jar" self.submitter.submit(class_in_jar="com.abrandon.upm.SupportVectorMachine",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchgroupby(self,submit_conf, num_kvpairs=None, value_size=None, nmappers=None,scheduler_options=None,master=None): default_settings = get_default_settings('groupby') if num_kvpairs is None: num_kvpairs = default_settings.get('num_kvpairs') if value_size is None: value_size = default_settings.get('value_size') if nmappers is None: nmappers = default_settings.get('nmappers') class_params = [num_kvpairs,value_size,nmappers] jar_directory = self.home_directory + "BenchMark-1.0-SNAPSHOT.jar" self.submitter.submit(class_in_jar="com.abrandon.upm.GroupByTest",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchtrianglecount(self,input,submit_conf,output=None,npartitions=None,storage_level=None, scheduler_options=None,master=None): default_settings = get_default_settings('trianglecount') if storage_level is None: storage_level = default_settings.get('storage_level') if output is None: output = default_settings.get('output') if npartitions is None: npartitions = default_settings.get('npartitions') class_params = [input,output,npartitions,storage_level] jar_directory = self.home_directory + "TriangleCount/target/TriangleCountApp-1.0.jar" self.submitter.submit(class_in_jar="src.main.scala.triangleCountApp",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchshortestpaths(self,input,submit_conf,output=None,npartitions=None,numv=None, scheduler_options=None,master=None): default_settings = get_default_settings('shortestpaths') if numv is None: numv = default_settings.get('numv') if output is None: output = default_settings.get('output') if npartitions is None: npartitions = default_settings.get('npartitions') class_params = [input,output,npartitions,numv] jar_directory = self.home_directory + "ShortestPaths/target/ShortestPathsApp-1.0.jar" self.submitter.submit(class_in_jar="src.main.scala.ShortestPathsApp",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchgeneratelinearregfile(self,output,size,npartitions,submit_conf,nfeatures=None, eps=None, probone=None,scheduler_options=None,master=None): npoints = get_npoints_for_size('linear',size) default_settings = get_default_settings('generatelinearreg') if nfeatures is None: nfeatures = default_settings.get('nfeatures') if eps is None: eps = default_settings.get('eps') if probone is None: probone = default_settings.get('probone') class_params = [output,npoints,nfeatures,eps,probone,str(npartitions)] jar_directory = self.home_directory + "LinearRegression/target/LinearRegressionApp-1.0.jar" self.submitter.submit(class_in_jar="LinearRegression.src.main.java.LinearRegressionDataGen",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchgeneratekmeansfile(self,output,size,npartitions,submit_conf,nclusters=None, ndimensions=None, scaling=None,scheduler_options=None,master=None): npoints = get_npoints_for_size('kmeans',size) default_settings = get_default_settings('generatekmeans') if nclusters is None: nclusters = default_settings.get('nclusters') if ndimensions is None: ndimensions = default_settings.get('ndimensions') if scaling is None: scaling = default_settings.get('scaling') class_params = [output,npoints,nclusters,ndimensions,scaling,str(npartitions)] jar_directory = self.home_directory + "KMeans/target/KMeansApp-1.0.jar" self.submitter.submit(class_in_jar="kmeans_min.src.main.scala.KmeansDataGen",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchkmeans(self,input,submit_conf,output=None,npartitions=None,nclusters=None,max_iterations=None,num_run=None, scheduler_options=None,master=None): default_settings = get_default_settings('kmeans') if nclusters is None: nclusters = default_settings.get('nclusters') if max_iterations is None: max_iterations = default_settings.get('max_iterations') if num_run is None: num_run = default_settings.get('num_run') if output is None: output = default_settings.get('output') if npartitions is None: npartitions = default_settings.get('npartitions') class_params = [input,output,nclusters,max_iterations,num_run,str(npartitions)] jar_directory = self.home_directory + "KMeans/target/KMeansApp-1.0.jar" self.submitter.submit(class_in_jar="KmeansApp",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchpagerank(self,input,submit_conf,output=None,npartitions=None,max_iterations=None,tolerance=None, reset_prob=None,storage_level=None,scheduler_options=None,master=None): default_settings = get_default_settings('pagerank') if max_iterations is None: max_iterations = default_settings.get('max_iterations') if tolerance is None: tolerance = default_settings.get('tolerance') if reset_prob is None: reset_prob = default_settings.get('reset_prob') if storage_level is None: storage_level = default_settings.get('storage_level') if output is None: output = default_settings.get('output') if npartitions is None: npartitions = default_settings.get('npartitions') class_params = [input,output,npartitions,max_iterations,tolerance,reset_prob,storage_level] jar_directory = self.home_directory + "PageRank/target/PageRankApp-1.0.jar" self.submitter.submit(class_in_jar="src.main.scala.pagerankApp",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)
def launchdecisiontrees(self,input,submit_conf,output=None,nclasses=None,impurity=None, max_depth=None,max_bins=None,mode=None,scheduler_options=None,master=None): default_settings = get_default_settings('decisiontree') if nclasses is None: nclasses = default_settings.get('nclasses') if impurity is None: impurity = default_settings.get('impurity') if max_depth is None: max_depth = default_settings.get('max_depth') if max_bins is None: max_bins = default_settings.get('max_bins') if mode is None: mode = default_settings.get('mode') if output is None: output = default_settings.get('output') class_params = [input,output,nclasses,impurity,max_depth,max_bins,mode] jar_directory = self.home_directory + "DecisionTree/target/DecisionTreeApp-1.0.jar" self.submitter.submit(class_in_jar="DecisionTree.src.main.java.DecisionTreeApp",class_params=class_params, jar=jar_directory, master=master,submit_conf=submit_conf,scheduler_options=scheduler_options)