Python Disco.Disco 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: disco.core

클래스/타입: Disco

메소드/함수: Disco

hotexamples.com에서의 예제들: 15

Python Disco.Disco - 15개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 disco.core.Disco.Disco에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Disco(15)

wait(14)

new_job(3)

nodeinfo(2)

results(2)

blobs(1)

events(1)

joblist(1)

jobspec(1)

profile_stats(1)

purge(1)

request(1)

tag(1)

예제 #1

파일 보기

    def __init__(self, *args, **kwargs):
        # load the defaults
        super(Settings, self).update(defaults)

        # override with the settings file
        path = kwargs.get('settings_file') or self['settings_file']
        if path and os.path.exists(path):
            try:
                import yaml
                self.update(yaml.load(open(path)))
            except:
                pass  # if ya can't ya can't

        # final overrides
        super(Settings, self).update(overrides)
        super(Settings, self).__init__(*args, **kwargs)

        # set up ddfs and disco
        if not self['server'].startswith('disco://'):
            self['server'] = 'disco://' + self['server']

        if 'ddfs' not in self:
            self['ddfs'] = DDFS(self['server'])
        self['server'] = Disco(self['server'])

        # set up worker
        if 'worker' not in self:
            worker_mod, _, worker_class = self['worker_class'].rpartition('.')
            mod = __import__(worker_mod, {}, {}, worker_mod)
            self['worker'] = getattr(mod, worker_class)()

예제 #2

파일 보기

def get_disco_handle(server):
    from disco.core import Disco
    from disco.ddfs import DDFS

    if server and not server.startswith('disco://'):
        server = 'disco://' + server

    return Disco(server), DDFS(server)

예제 #3

파일 보기

파일: discojob.py 프로젝트: pombredanne/odisco

def submit(master, jobpack):
    from disco.settings import DiscoSettings
    from disco.core import Disco
    settings = DiscoSettings()
    dmaster = Disco(master)
    print "Submitting job to ", master
    status, response = json.loads(dmaster.request('/disco/job/new', jobpack))
    if status != 'ok':
        errmsg('Failed to start job. Server replied: %s' % response)
    print response

예제 #4

파일 보기

파일: kclustering.py 프로젝트: chinnurtb/disco_playground

    """
    Predict the closest clusters for the datapoints in input.
    """
    job = master.new_job(name='kcluster_predict',
                         input=input,
                         map_reader=map_reader,
                         map=predict_map,
                         params=Params(centers=centers, **center),
                         nr_reduces=0)

    return job.wait()


if __name__ == '__main__':
    parser = OptionParser(usage='%prog [options] inputs')
    parser.add_option('--disco-master',
                      default=getenv('DISCO_MASTER'),
                      help='Disco master')
    parser.add_option('--iterations', default=10, help='Numbers of iteration')
    parser.add_option('--clusters', default=10, help='Numbers of clusters')

    (options, input) = parser.parse_args()
    master = Disco(options.disco_master)

    centers = estimate(master, input, mean_point_center, int(options.clusters),
                       int(options.iterations))

    res = predict(master, input, mean_point_center, centers)

    print '\n'.join(res)

예제 #5

파일 보기

 def disco(self):
     return Disco(self.disco_master_url)

예제 #6

파일 보기

 def disco(self):
     from disco.core import Disco
     return Disco(self.settings['DISCO_MASTER'])

예제 #7

파일 보기

    input = inputs or [
        maybe_list(line.split()) for line in fileinput.input(inputs)
    ]
    job = reify(jobclass)(program.disco, name)

    try:
        params = job.params
    except AttributeError:
        params = Params()
    params.__dict__.update(**dict(program.options.params))

    job.run(input=input, **program.option_parser.jobdict)
    print job.name


@Disco.command
def wait(program, jobname):
    """Usage: jobname

    Wait for the named job to complete.
    """
    program.disco.wait(jobname)


if __name__ == '__main__':
    Disco(option_parser=DiscoOptionParser()).main()

    # Workaround for "disco test" in Python2.5 which doesn't shutdown the
    # test_server thread properly.
    sys.exit(0)  # XXX still needed?

예제 #8

파일 보기

파일: cli.py 프로젝트: tpeng/disco

 def disco(self):
     from disco.core import Disco
     return Disco(settings=self.settings)

예제 #9

파일 보기

import sys
from disco.core import Disco, result_iterator
from disco.settings import DiscoSettings


def map(line, params):
    for word in line.split():
        yield word, 1


def reduce(iter, params):
    from disco.util import kvgroup
    for word, counts in kvgroup(sorted(iter)):
        yield word, sum(counts)


disco = Disco(DiscoSettings()['DISCO_MASTER'])
print "Starting Disco job.."
print "Go to %s to see status of the job." % disco.master
results = disco.new_job(
    name="wordcount",
    input=["http://discoproject.org/media/text/chekhov.txt"],
    map=map,
    reduce=reduce,
    save=True).wait()
print "Job done. Results:"
for word, count in result_iterator(results):
    print word, count

예제 #10

파일 보기

 def __init__(self, name=None, master=None, worker=None, settings=None):
     from disco.core import Disco
     self.name = name or type(self).__name__
     self.disco = master if isinstance(master, Disco) else Disco(master)
     self.worker = worker or self.Worker()
     self.settings = settings or DiscoSettings()

예제 #11

파일 보기

def fit_predict(training_data,
                fitting_data,
                tau=1,
                samples_per_job=0,
                save_results=True,
                show=False):
    from disco.worker.pipeline.worker import Worker, Stage
    from disco.core import Job, result_iterator
    from disco.core import Disco
    """
    training_data - training samples
    fitting_data - dataset to be fitted to training data.
    tau - controls how quickly the weight of a training sample falls off with distance of its x(i) from the query point x.
    samples_per_job - define a number of samples that will be processed in single mapreduce job. If 0, algorithm will calculate number of samples per job.
    """

    try:
        tau = float(tau)
        if tau <= 0:
            raise Exception("Parameter tau should be >= 0.")
    except ValueError:
        raise Exception("Parameter tau should be numerical.")

    if fitting_data.params["id_index"] == -1:
        raise Exception("Predict data should have id_index set.")

    job = Job(worker=Worker(save_results=save_results))
    job.pipeline = [("split",
                     Stage("map",
                           input_chain=fitting_data.params["input_chain"],
                           init=simple_init,
                           process=map_predict))]
    job.params = fitting_data.params
    job.run(name="lwlr_read_data", input=fitting_data.params["data_tag"])

    samples = {}
    results = []
    tau = float(2 * tau**2)  # calculate tau once
    counter = 0

    for test_id, x in result_iterator(job.wait(show=show)):
        if samples_per_job == 0:
            # calculate number of samples per job
            if len(x) <= 100:  # if there is less than 100 attributes
                samples_per_job = 100  # 100 samples is max per on job
            else:
                # there is more than 100 attributes
                samples_per_job = len(x) * -25 / 900. + 53  # linear function

        samples[test_id] = x
        if counter == samples_per_job:
            results.append(
                _fit_predict(training_data, samples, tau, save_results, show))
            counter = 0
            samples = {}
        counter += 1

    if len(samples) > 0:  # if there is some samples left in the the dictionary
        results.append(
            _fit_predict(training_data, samples, tau, save_results, show))

    # merge results of every iteration into a single tag
    ddfs = Disco().ddfs
    ddfs.tag(job.name, [[list(ddfs.blobs(tag))[0][0]] for tag in results])

    return ["tag://" + job.name]

예제 #12

파일 보기

 def disco(self):
     return Disco(settings=self.settings)

예제 #13

파일 보기

from discodex import settings
from discodex.mapreduce import (Indexer, DiscoDBIterator)
from discodex.objects import (DataSet, IChunks, Indices, Index, Results, Dict)

from disco.core import Disco
from disco.ddfs import DDFS
from disco.error import DiscoError
from disco.util import flatten, parse_dir

discodex_settings = settings.DiscodexSettings()
disco_master_url = discodex_settings['DISCODEX_DISCO_MASTER']
disco_prefix = discodex_settings['DISCODEX_DISCO_PREFIX']
index_prefix = discodex_settings['DISCODEX_INDEX_PREFIX']
purge_file = discodex_settings['DISCODEX_PURGE_FILE']
disco_master = Disco(disco_master_url)
ddfs = DDFS(disco_master_url)

NOT_FOUND, OK, ACTIVE, DEAD = 'unknown job', 'ready', 'active', 'dead'


class IndexCollection(Collection):
    allowed_methods = ('GET', 'POST')

    def delegate(self, request, *args, **kwargs):
        name = str(kwargs.pop('name'))
        return IndexResource(name)(request, *args, **kwargs)

    @property
    def names(self):
        return ddfs.list(index_prefix)

예제 #14

파일 보기

파일: paralleletl.py 프로젝트: xiufengliu/ETLMR

	                  2. Online ODAT; 3. Offline dim')
	parser.add_option('--post-fix',
	                  default=1,
	                  help='Does post-fixing for ODAT? (default=1): 1. Yes; 2. No')	
	parser.add_option('--go-live',
	                  default=1,
	                  help='Load offline dim data to DW DBMS? (default=1): 1. yes; 2. No')		
	parser.add_option('--profile',
	                  default=False,
	                  help='Profile (default=False)')
	parser.add_option('--config',
	                  default='conf/config.py',
	                  help='The path to config.py (default=conf/config.py)')

	(options, input_paths) = parser.parse_args()
	master = Disco("disco://"+options.disco_master)	
	
	load_method = odotetlmr
	seq_process = None
	post_fixing = -1
	load_step = int(options.load_step)
	if options.load_method=='2':
		load_method = odatetlmr
		if  load_step==1:
			post_fixing = int(options.post_fix)
			seq_process = multiprocessing.Process(target=seq_server)
			seq_process.start()		
	elif options.load_method=='3':
		load_method = offdimetlmr
		
	input_file_urls = []

예제 #15

파일 보기

 def data():
     return Disco(self.master).jobpack(self.jobname)