Exemple #1
0
def measure(arg,
            commandline,
            delay,
            maxtime,
            outFile=None,
            errFile=None,
            inFile=None,
            logger=None,
            affinitymask=None):

    r, w = os.pipe()
    forkedPid = os.fork()

    if forkedPid:  # read pickled measurements from the pipe
        os.close(w)
        rPipe = os.fdopen(r)
        r = cPickle.Unpickler(rPipe)
        measurements = r.load()
        rPipe.close()
        os.waitpid(forkedPid, 0)
        return measurements

    else:
        # Sample thread will be destroyed when the forked process _exits
        class Sample(threading.Thread):
            def __init__(self, program):
                threading.Thread.__init__(self)
                self.setDaemon(1)
                self.timedout = False
                self.p = program
                self.maxMem = 0
                self.childpids = None
                self.start()

            def run(self):
                try:
                    remaining = maxtime
                    while remaining > 0:
                        time.sleep(delay)
                        remaining -= delay
                    else:
                        self.timedout = True
                        os.kill(self.p, signal.SIGKILL)
                except OSError, (e, err):
                    if logger: logger.error('%s %s', e, err)

        try:

            m = Record(arg)

            # only write pickles to the pipe
            os.close(r)
            wPipe = os.fdopen(w, 'w')
            w = cPickle.Pickler(wPipe)

            start = time.time()

            # spawn the program in a separate process
            p = Popen(commandline,
                      stdout=outFile,
                      stderr=errFile,
                      stdin=inFile)

            # start a thread to sample the program's resident memory use
            t = Sample(program=p.pid)

            # wait for program exit status and resource usage
            rusage = os.wait3(0)

            elapsed = time.time() - start

            # summarize measurements
            if t.timedout:
                m.setTimedout()
            elif rusage[1] == os.EX_OK:
                m.setOkay()
            else:
                m.setError()

            m.userSysTime = rusage[2][0] + rusage[2][1]
            m.maxMem = t.maxMem
            m.cpuLoad = "%"
            m.elapsed = elapsed

        except KeyboardInterrupt:
            os.kill(p.pid, signal.SIGKILL)

        except ZeroDivisionError, (e, err):
            if logger: logger.warn('%s %s', err, 'too fast to measure?')
                trainTargets[count]         = ecfps[CID]
                count +=1
    
        model.fit(trainImages, trainTargets, batch_size=batch_size, nb_epoch=1)
        
        
        shuffle(testFs)
        count   = 0
        for x in testFs[:chunkSize/10]:
            if x.find(".png") > -1:
                CID     = x[:x.find(".png")]
                image   = io.imread(direct+x,as_grey=True)[10:-10,10:-10]         
                image   = np.where(image > 0.1,1.0,0.0)
                testImages[count,0,:,:]    = image
                testTargets[count]         = ecfps[CID]
                count +=1
        
        preds   = model.predict(testImages)
        RMSE    = np.sqrt(mean_squared_error(testTargets, preds))         
        print "RMSE of epoch: ", RMSE

        
        if DUMP_WEIGHTS:
            dumpWeights(model)

        with open(folder+"wholeModel.pickle", 'wb') as f:
            cp     = cPickle.Pickler(f)
            cp.dump(model)


Exemple #3
0
 def dump(obj, file, protocol=None):
     pickler = pickle.Pickler(file, protocol=PROTOCOL)
     pickler.persistent_id = _function_pickling_handler
     pickler.dump(obj)
Exemple #4
0
        pass
    else:
        assert 0, "internal error - pickling should fail"
    try:
        p.dump(m)
    except pickle.PicklingError:
        pass
    else:
        assert 0, "internal error - pickling should fail"
    del c, m, fp, p

if 1 and cPickle:
    c = Curl()
    m = CurlMulti()
    fp = StringIO()
    p = cPickle.Pickler(fp, 1)
    try:
        p.dump(c)
    except cPickle.PicklingError:
        pass
    else:
        assert 0, "internal error - pickling should fail"
    try:
        p.dump(m)
    except cPickle.PicklingError:
        pass
    else:
        assert 0, "internal error - pickling should fail"
    del c, m, fp, p

 def dumps(self, arg, proto=0):
     p = cPickle.Pickler(proto)
     p.dump(arg)
     return p.getvalue()
Exemple #6
0
 def __setitem__(self, key, value):
     f = StringIO.StringIO()
     p = pickle.Pickler(f)
     p.dump(value)
     gimp.set_data(key, f.getvalue())
def get_requests(files,
                 start=None,
                 end=None,
                 statsfname=None,
                 writestats=None,
                 readstats=None):
    finished = []
    unfinished = {}
    if readstats:
        fp = open(statsfname, 'r')
        u = cPickle.Unpickler(fp)
        requests = u.load()
        fp.close()
        del u
        del fp
    else:
        while 1:
            tup = get_earliest_file_data(files)
            if tup is None:
                break
            code, pid, id, fromepoch, desc = tup
            if start is not None and fromepoch < start:
                continue
            if end is not None and fromepoch > end:
                break
            if code == 'U':
                # restart
                for upid, uid in list(unfinished.keys()):
                    if upid == pid:
                        val = unfinished[(upid, uid)]
                        finished.append(val)
                        del unfinished[(upid, uid)]
                request = StartupRequest()
                request.url = desc
                request.start = fromepoch
                finished.append(request)
                continue
            request = unfinished.get((pid, id))
            if request is None:
                if code != "B":
                    continue  # garbage at beginning of file
                request = Request()
                for pending_req in unfinished.values():
                    pending_req.active = pending_req.active + 1
                unfinished[(pid, id)] = request
            try:
                request.put(code, fromepoch, desc)
            except:
                print "Unable to handle entry: %s %s %s" % (code, fromepoch,
                                                            desc)
            if request.isfinished():
                del unfinished[(pid, id)]
                finished.append(request)

        finished.extend(unfinished.values())
        requests = finished

        if writestats:
            fp = open(statsfname, 'w')
            p = cPickle.Pickler(fp)
            p.dump(requests)
            fp.close()
            del p
            del fp

    return requests
        # continue with previous protein
        else:
            sequence += line.strip()
    p.close()
    # pop out first dummy sequence
    sequences.pop(0)

    # matrix representation of position along a sequence
    # where selected k-mers are found, up to m mismatches
    hit_matrix = compile_hit_matrix(sequences, kmers, m)

    # save compiled data
    f = open(
        data_path + virus_family + '_hitmatrix_collapsed_%d_%d.pkl' % (k, m),
        'w')
    cPickle.Pickler(f, protocol=2).dump(hit_matrix)
    cPickle.Pickler(f, protocol=2).dump(viruses)
    cPickle.Pickler(f, protocol=2).dump(classes)
    f.close()

    # group viruses with similar hosts together
    sort_indices = hit_matrix[:, 0].argsort()
    sort_virus_id = [viruses[i] for i in sort_indices]
    sort_viruses = [classes[v][0] for v in sort_virus_id]

    # plot and save the visualization
    figure = plot_hit_matrix(hit_matrix[sort_indices, :], k, m, kmer_list)
    filename = '%s/fig/%s_protein_kmer_visualization_collapsed_%d_%d.eps' % (
        project_path, virus_family, k, m)
    figure.savefig(fname, dpi=(500), format='eps')
Exemple #9
0
 def pickle(self):
     f = open(self.fname, "w")
     p = cPickle.Pickler(f)
     obj = (self.dbinterfaces, self.dbroutes, self.dbroutelist)
     p.dump(obj)
     f.close()
Exemple #10
0
def adaboost(X,
             Y,
             x,
             y,
             predicted_labels,
             test_indices,
             params,
             kmer_dict,
             model='stump',
             virus_family='picorna'):
    """
    Input:
        X : DxN array (Train data) 
        Y : KxN array (Train labels)
        x : Dxn array (Test data)
        y : Kxn array (Test labels)
        predicted_labels : 
        test_indices : 
        params : tuple (fold index, kmer length, mismatch, num of boosting rounds)
        kmer_dict : a dictionary mapping row id
                    to kmers.
        model : string
            can be "tree" or "stump"
        virus_family : 'picorna' / 'rhabdo'
    """

    X = X.astype('float')
    Y = Y.astype('float')
    (D, N) = X.shape
    K = Y.shape[0]
    n = x.shape[1]
    test_indices.sort()

    f = params[0]
    k = params[1]
    m = params[2]
    T = params[3]
    """
    creating output files
    onfname - test/train errors and the selected feature 
             at each round is output in this file
    tnfname - the decision tree after T rounds of boosting
            is output in this file
    dfname - a general dump of the test/train predictions 
        for all T rounds is output in this file
    """
    filetag = model + '_%d_%d_%d' % (k, m, f)
    onfname = project_path + 'cache/%s_temp/%s_error%s.txt' % (
        virus_family, virus_family, filetag)
    tnfname = project_path + 'cache/%s_temp/%s_decision%s.pkl' % (
        virus_family, virus_family, filetag)
    dfname = project_path + 'cache/%s_temp/%s_dump%s.pkl' % (
        virus_family, virus_family, filetag)

    # Initializing weight over examples - Uniform distribution
    w = np.ones(Y.shape, dtype='float') / (N * K)

    #Data structures to store output from boosting at each round.
    #dectree - a list of all nodes (and their attributes) in the decision tree
    #Tpred/tpred - stores the output of the decision tree at each round (train/test samples)
    Phidict = dict()
    phidict = dict()
    dectree = dict()
    order = []
    Tpred = np.zeros((K, N, T + 1), dtype='float32')
    tpred = np.zeros((K, n, T + 1), dtype='float32')
    rocacc = np.zeros((T + 1, 5), dtype='float32')

    starttime = time.time()
    # root decision function/prediction node.
    # root decision function always outputs 1.
    v = ((w * Y).sum(1) > 0) * 2. - 1.
    v = v.reshape(K, 1)
    # compute cumulative weights
    Yv = Y * v
    Wp = (w * (Yv > 0)).sum()
    Wm = (w * (Yv < 0)).sum()
    # a = coefficient of weak rule
    a = 0.5 * np.log((Wp + EPS) / (Wm + EPS))

    if a < 0:
        a = np.abs(a)
        v = -1 * v

    # update decision tree and prediction list.
    Phi = np.ones((1, N), dtype='float32')
    phi = np.ones((1, n), dtype='float32')
    Hweakrule = v * Phi
    hweakrule = v * phi
    # Phidict keys = feature ids
    # Phidict values = [\phi(x), feature wt, >/< decision, weak rule's output]
    Phidict[-1] = [[Phi, a, Hweakrule]]
    phidict[-1] = [[phi, a, hweakrule]]
    dectree[-1] = [-1, [a, [], v]]

    # compute the prediction output by the decision
    # tree for all train/test samples
    train_pred = np.zeros((K, N), dtype='float32')
    test_pred = np.zeros((K, n), dtype='float32')
    for kidx in Phidict.keys():
        for aidx in range(len(Phidict[kidx])):
            train_pred = train_pred + Phidict[kidx][aidx][1] * Phidict[kidx][
                aidx][2]
            test_pred = test_pred + phidict[kidx][aidx][1] * phidict[kidx][
                aidx][2]

    # store the real-valued prediction
    Tpred[:, :, 0] = train_pred
    tpred[:, :, 0] = test_pred

    # compute classification error at round 0
    rocacc[0, 1], rocacc[0, 3] = classification_error(train_pred, test_pred, Y,
                                                      y, 0.)
    duration = time.time() - starttime

    # write some output to file
    # file format: boosting round, k-mer selected,
    # train roc, train error, test roc, test error, time elapsed
    owrite = open(onfname, 'w')
    to_write = [
        -1, 'root', 'None', 0.5, rocacc[0, 1], 0.5, rocacc[0, 3], duration
    ]
    owrite.write('\t'.join(map(str, to_write)) + '\n')
    owrite.close()
    print to_write

    # update weights
    wnew = w * np.exp(-a * Hweakrule * Y)
    wnew = wnew / wnew.sum()
    w = wnew.copy()

    # starting boosting rounds
    for t in range(T):
        starttime = time.time()

        # choose the appropriate (leaf+weak rule) for the next prediction function
        #pstar, cstar, pastar, cvalue, Z = py_get_weak_rule(X, Y, Phidict, w, model)
        pstar, cstar, pastar, cvalue, Z = weave_get_weak_rule(
            X, Y, Phidict, w, model)
        PX = (X[cstar:cstar + 1, :] < cvalue) * 1
        px = (x[cstar:cstar + 1, :] < cvalue) * 1
        order.append(t)

        # Updating Tree and prediction dictionary
        Phidict[t] = []
        phidict[t] = []
        dectree[t] = [[kmer_dict[cstar], cvalue]]
        dectree[pstar][pastar + 1][1].append(t)
        Hweakrule = np.zeros((K, N), dtype='float')
        hweakrule = np.zeros((K, n), dtype='float')
        ans = [0, 1]

        for aidx in ans:
            # compute output of decision function
            Phi = Phidict[pstar][pastar][0] * (aidx + ((-1.)**aidx) * PX)
            phi = phidict[pstar][pastar][0] * (aidx + ((-1.)**aidx) * px)
            # calculate optimal value of alpha for that decision
            wYP = w * Y * Phi
            vstar = ((wYP.sum(1) > 0) * 2. - 1.).reshape(K, 1)
            YvP = Y * vstar * Phi
            Wp = (w * (YvP == 1)).sum()
            Wm = (w * (YvP == -1)).sum()

            a = 0.5 * np.log((Wp + EPS) / (Wm + EPS))
            if a < 0:
                a = np.abs(a)
                v = -1 * v

            # compute f(x) = \alpha * \phi(x) * v for each decision node
            Hweakrule += a * vstar * Phi

            # Update Tree and prediction dictionary
            Phidict[t].append([Phi, a, vstar * Phi])
            phidict[t].append([phi, a, vstar * phi])
            dectree[t].append([a, [], vstar])

        # Update example weights
        wnew = w * np.exp(-1. * Hweakrule * Y)
        wnew = wnew / wnew.sum()
        w = wnew.copy()

        # Calculate train and test predictions and errors
        train_pred = np.zeros((K, N), dtype='float32')
        test_pred = np.zeros((K, n), dtype='float32')
        for kidx in Phidict.keys():
            for aidx in range(len(Phidict[kidx])):
                train_pred = train_pred + Phidict[kidx][aidx][1] * Phidict[
                    kidx][aidx][2]
                test_pred = test_pred + phidict[kidx][aidx][1] * phidict[kidx][
                    aidx][2]

        Tpred[:, :, t + 1] = train_pred
        tpred[:, :, t + 1] = test_pred
        rocacc[t + 1,
               0], rocacc[t + 1,
                          2], rocacc[t + 1,
                                     4] = roc_auc(train_pred, test_pred, Y, y)
        rocacc[t + 1,
               1], rocacc[t + 1,
                          3] = classification_error(train_pred, test_pred, Y,
                                                    y, rocacc[t + 1, 4])
        predicted_labels[test_indices, t] = test_pred.argmax(0)
        duration = time.time() - starttime

        # output data
        owrite = open(onfname, 'a')
        to_write = [
            t, kmer_dict[cstar], cvalue, rocacc[t + 1, 0], rocacc[t + 1, 1],
            rocacc[t + 1, 2], rocacc[t + 1, 3], duration
        ]
        owrite.write('\t'.join(map(str, to_write)) + '\n')
        owrite.close()
        print to_write

    # output decision tree
    twrite = open(tnfname, 'w')
    cPickle.Pickler(twrite, protocol=2).dump(dectree)
    cPickle.Pickler(twrite, protocol=2).dump(order)
    twrite.close()

    # dump predictions for more analysis
    dwrite = open(dfname, 'w')
    cPickle.Pickler(dwrite, protocol=2).dump(Tpred)
    cPickle.Pickler(dwrite, protocol=2).dump(tpred)
    cPickle.Pickler(dwrite, protocol=2).dump(rocacc)
    dwrite.close()

    return predicted_labels
Exemple #11
0
 def __init__(self):
     self.file = tempfile.TemporaryFile(suffix=".log")
     self.pickler = cPickle.Pickler(self.file, 1)
     self.pickler.fast = 1
     self.stores = 0
     self.read = 0
Exemple #12
0
        def loadDataMV (self, filename, verbose=True, replace_missing=True):
                ''' Get the data from a text file in one of 3 formats: matrix, sparse, sparse_binary'''
#                data.MV=[]
#                data.X=[]
                if verbose:  print("========= Reading " + filename)
                ntime = nnum = ncat = nmvc = 0
                start = time.time()
		# find the type of features for the data set
                dictfeats=self.feat_type
                usetime = np.array(np.where(self.feat_type=='Time'))[0]
                usenum = np.array(np.where(self.feat_type=='Numerical'))[0]
                usecat = np.array(np.where(self.feat_type=='Categorical'))[0]
                usemulticat = np.array(np.where(self.feat_type=='Multi-value'))[0]
                if verbose:
                        print("=== Detected %d Numerical Features" % len(usenum))
                        print("=== Detected %d Categorical Features" % len(usecat))
                        print("=== Detected %d Multi-valued Categorical Features" % len(usemulticat))
                        print("=== Detected %d Time Features" % len(usetime))
		# artificial headers for features
                for i in range(len(dictfeats)):
                        dictfeats[i]=str(i)
		# read the first column to identify the total number of features
                df = pd.read_csv(filename,header=None, names=dictfeats,delim_whitespace=True, usecols=[0], parse_dates=True, na_values='NaN')

                n_samples=len(df.index)

                if verbose: print("=== %d Samples will be loaded " % len(df.index))
                concadat= np.zeros((n_samples, 1))
                del df

		# Check the available types of features
                if verbose: print("========================")
                """
                if usetime != []:
                        if verbose: print("=== Processing %d Time features " % len(usetime))
                        try:
                                dftime = pd.read_csv(filename,	header=None, names=self.feat_type[usetime], usecols=usetime, delim_whitespace=True,parse_dates=True, na_values='NaN')
                                ddt=np.array(dftime)
                                ntime=ddt.shape[1]
                                concadat= np.concatenate((concadat,ddt),axis=1)
                                del dftime
                                del ddt
                        except:	print ("Failed to load time variables")
                """

                if usenum != []:
                        if verbose: print("=== Processing %d Numerical features " % len(usenum))
                        try:
                                dfnum = pd.read_csv(filename, 	header=None, names=self.feat_type[usenum], usecols=usenum, delim_whitespace=True, na_values='NaN')
                                dd=np.array(dfnum)
                                nnum=dd.shape[1]
                                concadat= np.concatenate((concadat,dd),axis=1)
                                del dfnum
                                del dd
                        except:	print ("Failed to load numerical variables")

                if usecat != []: # categorical features will be loaded as numbers for efficiency
                        if verbose: print("=== Processing %d Categorical features " % len(usecat))
                        try:
                                dfcat = pd.read_csv(filename, 	header=None, names=self.feat_type[usecat], usecols=usecat,dtype=object, delim_whitespace=True, na_values='NaN')
                                ncat=dfcat.shape[1]
                                CAT=dfcat

                                # Treat categorical variables as integers or perform hash encoding (one hot encoding is far more expensive)
#                                catnumeric_dataset=np.array(dfcat)
                                #print("Tipo catego")
                                #print (catnumeric_dataset.dtype)
                                #enca = OrdinalEncoder().fit(dfcat)
                                #catnumeric_dataset = enca.transform(dfcat)
                                #catnumeric_dataset = np.array(catnumeric_dataset)

#                                ncat = catnumeric_dataset.shape[1]
#                                concadat= np.concatenate((concadat,catnumeric_dataset),axis=1)
#                                print (catnumeric_dataset)
#                                #np.savetxt('categ.csv',catnumeric_dataset,delimiter=',')
                                del dfcat
#                                del catnumeric_dataset
                        except:
                                print ("Failed to load Categorical variables")
                                CAT=[]
                else:
                        CAT=[]

                """
                if len(usemulticat) > 0:
                        if verbose: print("=== Processing %d Multi Valued Categorical features " % len(usemulticat))
                        try:
                                dfmvc = pd.read_csv(filename, 	header=None, names=self.feat_type[usemulticat], usecols=usemulticat,  dtype=object, delim_whitespace=True, na_values='NaN')
                                nmvc = dfmvc.shape[1]
                                MV=dfmvc
                                del dfmvc
                        except:
                                print ("Failed to load Multi-Valued Categorical variables")
                                MV=[]
                else:
                        MV=[]
                """
                ntime = 0
                nmvc = 0
                MV = []

                concadat=np.delete(concadat, 0, 1)
                self.info['loaded_feat_types'] = [ntime, nnum, ncat, nmvc]


                if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
                        with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
                                vprint (verbose, "Loading pickle file : " + os.path.join(self.tmp_dir, os.path.basename(filename) + ".pickle"))
                                return pickle.load(pickle_file)
                if 'format' not in self.info.keys():
                        self.getFormatData(filename)
                        print("not in self")
                if 'feat_num' not in self.info.keys():
                        self.getNbrFeatures(filename)

                dataX = concadat.astype(np.float64).copy(order='C')

		# IMPORTANT: when we replace missing values we double the number of variables

                if self.info['format']=='dense' and replace_missing and np.any(map(np.isnan,dataX)):
                        vprint (verbose, "Replace missing values by 0 (slow, sorry)")
                        dataX = data_converter.replace_missing(dataX)
                if self.use_pickle:
                        with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
                                vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
                                p = pickle.Pickler(pickle_file)
                                p.fast = True
                                p.dump(dataX)
                end = time.time()
                if verbose:
                        print( "Loaded %d Samples and %d Features" % (dataX.shape[0], dataX.shape[1]))
                        print( "[+] Success in %5.2f sec" % (end - start))

                data={}
                data['numerical']=dataX
                data['MV']=MV;
                data['CAT']=CAT;
                return data
Exemple #13
0

class People(object):
    """Klasa do piklowania."""
    def __init__(self, imie, nazwisko):
        self.imie = imie
        self.nazwisko = nazwisko

    def __str__(self):
        return "%s %s" % (self.imie, self.nazwisko)


p1 = People('Adam', 'Zazol')

print "Przed piklowaniem: %s" % p1

f = open("dane.dat", "wb")  # z trybem binarnym dla windows
p = cPickle.Pickler(f, 2)  # "piklowanie" w trybie binarnym
p.dump(p1)  # "zapiklowanie" do pliku
f.close()  # zamkniecie pliku

del p1  # to dla niedowiarkow...

f = open("dane.dat", "rb")  # znow uklon w strone windows
u = cPickle.Unpickler(f)  # sam rozpozna ze tryb binarny
p1 = u.load()  # tu ladujemy obiekt

print "Po odtworzeniu z pikla: %s" % p1

f.close()  # dobre maniery mowia o zamykaniu...
Exemple #14
0
def write_metrics(metrics, filename=None, filetype='json'):
    """
    Write metrics to file after running self.run_metrics()

    Input:
    ------
    metrics : dictionary
        Omnical_Metrics.run_metrics() output

    filename : str, default=None
        filename to write out, will use filename by default

    filetype : str, default='json', option=['json', 'pkl']
        specify file format of output metrics file
    """
    # get pols
    pols = list(metrics.keys())

    if filename is None:
        filename = os.path.join(metrics[pols[0]]['filedir'],
                                metrics[pols[0]]['filestem'] + '.omni_metrics')

    # write to file
    if filetype == 'json':
        if filename.split('.')[-1] != 'json':
            filename += '.json'

        # change ndarrays to lists
        metrics_out = copy.deepcopy(metrics)
        # loop over pols
        for h, pol in enumerate(metrics_out.keys()):
            # loop over keys
            for i, k in enumerate(metrics_out[pol].keys()):
                if isinstance(metrics_out[pol][k], np.ndarray):
                    metrics_out[pol][k] = metrics[pol][k].tolist()
                elif isinstance(metrics_out[pol][k], (dict, odict)):
                    if list(metrics_out[pol]
                            [k].values())[0].dtype == np.complex:
                        metrics_out[pol][k] = odict([
                            (j, metrics_out[pol][k][j].astype(np.str))
                            for j in metrics_out[pol][k]
                        ])
                    metrics_out[pol][k] = odict([
                        (str(j), metrics_out[pol][k][j].tolist())
                        for j in metrics_out[pol][k]
                    ])
                elif isinstance(metrics_out[pol][k], (np.bool, np.bool_)):
                    metrics_out[pol][k] = bool(metrics_out[pol][k])
                elif isinstance(metrics_out[pol][k], np.float):
                    metrics_out[pol][k] = float(metrics_out[pol][k])
                elif isinstance(metrics_out[pol][k], np.integer):
                    metrics_out[pol][k] = int(metrics_out[pol][k])

        with open(filename, 'w') as f:
            json.dump(metrics_out, f, indent=4)

    elif filetype == 'pkl':
        if filename.split('.')[-1] != 'pkl':
            filename += '.pkl'
        with open(filename, 'wb') as f:
            outp = pkl.Pickler(f)
            outp.dump(metrics)
Exemple #15
0
def measure(arg,commandline,delay,maxtime,
      outFile=None,errFile=None,inFile=None,logger=None,affinitymask=None):

   r,w = os.pipe()
   forkedPid = os.fork()

   if forkedPid: # read pickled measurements from the pipe
      os.close(w); rPipe = os.fdopen(r); r = cPickle.Unpickler(rPipe)
      measurements = r.load()
      rPipe.close()
      os.waitpid(forkedPid,0)
      return measurements

   else: 
      # Sample thread will be destroyed when the forked process _exits
      class Sample(threading.Thread):

         def __init__(self,program):
            threading.Thread.__init__(self)
            self.setDaemon(1)
            self.timedout = False 
            self.p = program
            self.maxMem = 0
            self.childpids = None   
            self.start() 
 
         def run(self):
            try:              
               remaining = maxtime               
               while remaining > 0: 
                  mem = gtop.proc_mem(self.p).resident                                   
                  time.sleep(delay)                    
                  remaining -= delay
                  # race condition - will child processes have been created yet?
                  self.maxMem = max((mem + self.childmem())/1024, self.maxMem)  
               else:
                  self.timedout = True
                  os.kill(self.p, signal.SIGKILL) 
            except OSError, (e,err):
               if logger: logger.error('%s %s',e,err)

         def childmem(self):
            if self.childpids == None:
               self.childpids = set()
               for each in gtop.proclist():
                  if gtop.proc_uid(each).ppid == self.p:
                     self.childpids.add(each)
            mem = 0
            for each in self.childpids:
               mem += gtop.proc_mem(each).resident
            return mem

       
      try:

         m = Record(arg)

         # only write pickles to the pipe
         os.close(r); wPipe = os.fdopen(w, 'w'); w = cPickle.Pickler(wPipe)

         # gtop cpu is since machine boot, so we need a before measurement
         cpus0 = gtop.cpu().cpus 
         start = time.time()

         # spawn the program in a separate process
         p = Popen(commandline,stdout=outFile,stderr=errFile,stdin=inFile)
         
         # start a thread to sample the program's resident memory use
         t = Sample( program = p.pid )

         # wait for program exit status and resource usage
         rusage = os.wait3(0)

         # gtop cpu is since machine boot, so we need an after measurement
         elapsed = time.time() - start
         cpus1 = gtop.cpu().cpus 

         # summarize measurements 
         if t.timedout:
            m.setTimedout()
         elif rusage[1] == os.EX_OK:
            m.setOkay()
         else:
            m.setError()

         m.userSysTime = rusage[2][0] + rusage[2][1]
         m.maxMem = t.maxMem

         load = map( 
            lambda t0,t1: 
               int(round( 
                  100.0 * (1.0 - float(t1.idle-t0.idle)/(t1.total-t0.total))
               ))
            ,cpus0 ,cpus1 )

         #load.sort(reverse=1) # maybe more obvious unsorted
         m.cpuLoad = ("% ".join([str(i) for i in load]))+"%"

         m.elapsed = elapsed


      except KeyboardInterrupt:
         os.kill(p.pid, signal.SIGKILL)

      except ZeroDivisionError, (e,err): 
         if logger: logger.warn('%s %s',err,'too fast to measure?')
Exemple #16
0
pipeliner = pipeline.Pipeliner(extractor, preprocessor, dictTrainer, encoder,
                               pooler)

if args.encoder is None:
    mpiutils.rootprint('training...')
    pipeliner.train(cifar, 400000)
mpiutils.rootprint('Processing data...')
# save the labels and the pipeliner first
if mpiutils.rank == 0:
    print 'tr label size: {}'.format(cifar.label_tr.shape)
    print 'te label size: {}'.format(cifar.label_te.shape)
    io.savemat(outputfolder + '/tr_label.mat', {'label': cifar.label_tr},
               oned_as='row')
    io.savemat(outputfolder + '/te_label.mat', {'label': cifar.label_te},
               oned_as='row')
    cPickle.Pickler(open(outputfolder + '/extractor.dat', 'w')).dump(extractor)
    cPickle.Pickler(open(outputfolder + '/preprocessor.dat',
                         'w')).dump(preprocessor)
    cPickle.Pickler(open(outputfolder + '/dictTrainer.dat',
                         'w')).dump(dictTrainer)
    cPickle.Pickler(open(outputfolder + '/encoder.dat', 'w')).dump(encoder)
    cPickle.Pickler(open(outputfolder + '/pooler.dat', 'w')).dump(pooler)

# process the data
pipeliner.batch_process_dataset(cifar, 1000,
                                outputfolder + '/cifar_tr_{}_{}.mat')
pipeliner.batch_process_dataset(cifar,
                                1000,
                                outputfolder + '/cifar_te_{}_{}.mat',
                                fromTraining=False)
mpiutils.safebarrier()
Exemple #17
0
    def goDoIt(self,
               inputSeq,
               commonData,
               targetFunction,
               binplace=True,
               alternateSource=None,
               recursive=False,
               quiet=False,
               label="",
               email=""):
        """
        Executes a function on the Golem cluster indicated by the settings for this object.
        Parameters:
            inputSeq - inputs to the function to run. This will be desequenced and run in a batch of method calls, one
                       call per item. Objects in the input must be possible and meaningful to pickle, and
                       unpickle on a different machine.
            commonData - input that should be provided to every invocation of the function. Must be possible and
                         meaningful to pickle, then unpickle on a different machine.
            targetFunction - Function to execute. Must have prototype func(item, item), by any name, where the
                             first item is something off the inputSeq sequence and the second is the commonData.
                             It must return its result, as only these return values will be pickled and sent back;
                             changes to global variables are not captured or returned.
            binPlace - Should the script containing the target function be copied to the path that the input
                       data is being copied to? If so, that one file must be the ONLY non-library file required
                       for the function to work properly, since files around it won't be known about. This
                       permits development out of a directory that isn't world-readable. If this is set to something
                       that evaluates to false, the file will be used in place, and must be visible to the
                       workers by the same path it is visible to the client. Default: True.
            alternateSource - Path to the file to use as the source for the script containing the targetFunction.
                              This should be blank in almost all instances, since this function will determine this
                              data reflectively in that case and that is much more likely to be correct. Use this
                              if, for some reason, the file detected by inspect.getAbsFilePath is wrong, or if
                              a different path is simply required to access the data on the workers.
                              If the binPlace flag is set, this is the file that is copied, if it is not None (if it is,
                              then the detected file is copied). Default: None.
            recursive - Deprecated.
            quiet - Suppress server responses from being printed to stdout while waiting for results. Default: False,
                    for backwards compatibility. "True" is more likely to be desirable.
            label - Alternate identifier for locating the job in the log later. Optional.
            email - Informational field to identify the person running the job in case they need to be contacted. Optional.
        """
        if len(sys.argv) > 1 and sys.argv[1] == "--golemtask":
            # uh-oh
            raise InfiniteRecursionError(
                "goDoIt called from something that was already a Golem task, "
                +
                "without the 'recursive' flag indicating that this is intentional."
                +
                "Make sure to test for __name__ == '__main__' in your main program,"
                +
                "or it will try to execute in its entirety when Golemizer tries to import."
            )

        restoreThisCwdOrPeopleWillHateMePassionately = os.getcwd()
        loud = not quiet
        try:
            outName = str(uuid.uuid1())
            os.chdir(self.pickleInputShare)
            os.mkdir(outName)  # insecure: mode 0777
            os.chdir(outName)
            picklePath = os.getcwd()
            pickleCount = 0
            nextList = []
            n = 0
            localLimit = self.taskSize

            for parameter in inputSeq:
                nextList.append(parameter)
                n += 1
                if n >= localLimit:
                    self._spill(nextList, pickleCount)
                    nextList = []
                    n = 0
                    pickleCount += 1
            if nextList:
                self._spill(nextList, pickleCount)
                pickleCount += 1

            if not alternateSource:
                # restore original path or getabsfile doesn't work correctly as
                # of 2.7
                os.chdir(restoreThisCwdOrPeopleWillHateMePassionately)
                target = inspect.getabsfile(targetFunction)
                os.chdir(self.pickleInputShare)
                os.chdir(outName)
                # print "===> Original file:", target
            else:
                target = alternateSource

            if binplace:
                # print "===> Original file:", target
                newTarget = os.path.join(picklePath, os.path.basename(target))
                # print "===> New file:", newTarget
                shutil.copy2(target, newTarget)
                target = newTarget

            time.sleep(2)
            commonFile = open("common.pkl", "wb")
            commonObjectPickler = cPickle.Pickler(commonFile, 2)
            commonObjectPickler.dump(commonData)
            commonObjectPickler.dump(targetFunction)
            commonFile.flush()
            commonFile.close()

            runlist = [
                {
                    "Count":
                    1,
                    "Args": [
                        self.pyPath,
                        self.thisLibraryPath,
                        "--golemtask",
                        os.path.join(picklePath, "common.pkl"),
                        # we are making certain filename
                        # assumptions on the client side
                        os.path.join(picklePath,
                                     str(n) + ".pkl"),
                        self.jobOutputPath,
                        target
                    ]
                } for n in range(0, pickleCount)
            ]

            response, content = golem.runBatch(runlist, self.serverPass,
                                               self.masterPath, loud, label,
                                               email)
            jobId = golemBlocking.jobIdFromResponse(content)
            finalStatus = golemBlocking.stall(jobId, self.masterPath, loud)
            if loud and (finalStatus["Status"] != "SUCCESS"):
                print "Uh-oh- job status is", finalStatus[
                    "Status"], "and we're probably going to crash soon"

            # Note: We're choosing to ignore stdout/stderr. We can revisit this design decision later and decide to
            # do something instead, if we really desperately want to

            # resultPathGenerator = (os.path.abspath(
            #    os.path.join(
            #        self.golemOutPath, "golem_" + x + os.sep, self.jobOutputPath,
            #    )
            #) for x in self.golemIds)

            golemDirPattern = re.compile("golem_\\d+")

            resultPathGenerator = (os.path.abspath(
                os.path.join(self.golemOutPath, foo))
                                   for foo in os.listdir(self.golemOutPath)
                                   if golemDirPattern.match(foo))

            resultFilesNumbered = []

            filenamePattern = re.compile(
                "^{0}_(\\d+)\\.out\\.pkl$".format(jobId))

            # because we're already performing the match,
            # decorate-sort-undecorate is the best sort strategy here
            for resultPath in resultPathGenerator:
                # print "==>", resultPath
                for file in os.listdir(resultPath):
                    match = filenamePattern.match(file)
                    if match:
                        # print "====>", file
                        resultFilesNumbered.append(
                            (int(match.group(1)),
                             os.path.join(resultPath, file)))

            if len(resultFilesNumbered) != pickleCount:
                raise ExecutionFailure(
                    "Unknown error prevented {0} of {1} task bundles from completing."
                    .format(pickleCount - len(resultFilesNumbered),
                            pickleCount))
            resultFilesNumbered.sort()

            return _unpickleSequence((pair[1] for pair in resultFilesNumbered))
        finally:
            os.chdir(restoreThisCwdOrPeopleWillHateMePassionately)
Exemple #18
0
# just pull out ten features from X to make sure the whole thing works
X = X[:10,:]

v = z.values()
split_indices = splitdata.cv_multiclass_fold(Y,10)

labels = np.argmax(Y,axis=0)
labelled_featuresets = [(dict(zip(v,data)),y) for (data,y) in zip(X.T,labels)]

test_labels=[]
true_labels=[]
for i,train_indices in enumerate(split_indices):
    test_indices = list(set(range(Y.shape[1])).difference(train_indices))
    # train
    train_features = [labelled_featuresets[i] for i in train_indices]
    model = nb.NaiveBayesClassifier.train(train_features)
    # test
    test_features = [labelled_featuresets[i] for i in test_indices]
    label = [model.classify(featureset[0]) for featureset in test_features]
    # collect
    true_labels.append(Y[:,test_indices])
    test_labels.append(label)
# save
fh = open('/proj/ar2384/picorna/labels.pkl','w')
labels = {
    "true":true_labels,
    "test":test_labels
}
cPickle.Pickler(fh,protocol=2).dump(labels)

Exemple #19
0
    def __init__(self,
                 storage,
                 pool_size=7,
                 pool_timeout=1 << 31,
                 cache_size=400,
                 cache_size_bytes=0,
                 historical_pool_size=3,
                 historical_cache_size=1000,
                 historical_cache_size_bytes=0,
                 historical_timeout=300,
                 database_name='unnamed',
                 databases=None,
                 xrefs=True,
                 large_record_size=1 << 24,
                 **storage_args):
        """Create an object database.

        :Parameters:
          - `storage`: the storage used by the database, e.g. FileStorage
          - `pool_size`: expected maximum number of open connections
          - `cache_size`: target size of Connection object cache
          - `cache_size_bytes`: target size measured in total estimated size
               of objects in the Connection object cache.
               "0" means unlimited.
          - `historical_pool_size`: expected maximum number of total
            historical connections
          - `historical_cache_size`: target size of Connection object cache for
            historical (`at` or `before`) connections
          - `historical_cache_size_bytes` -- similar to `cache_size_bytes` for
            the historical connection.
          - `historical_timeout`: minimum number of seconds that
            an unused historical connection will be kept, or None.
          - `xrefs` - Boolian flag indicating whether implicit cross-database
            references are allowed
        """
        if isinstance(storage, basestring):
            from ZODB import FileStorage
            storage = ZODB.FileStorage.FileStorage(storage, **storage_args)
        elif storage is None:
            from ZODB import MappingStorage
            storage = ZODB.MappingStorage.MappingStorage(**storage_args)

        # Allocate lock.
        x = threading.RLock()
        self._a = x.acquire
        self._r = x.release

        # pools and cache sizes
        self.pool = ConnectionPool(pool_size, pool_timeout)
        self.historical_pool = KeyedConnectionPool(historical_pool_size,
                                                   historical_timeout)
        self._cache_size = cache_size
        self._cache_size_bytes = cache_size_bytes
        self._historical_cache_size = historical_cache_size
        self._historical_cache_size_bytes = historical_cache_size_bytes

        # Setup storage
        self.storage = storage
        self.references = ZODB.serialize.referencesf
        try:
            storage.registerDB(self)
        except TypeError:
            storage.registerDB(self, None)  # Backward compat

        if (not hasattr(storage, 'tpc_vote')) and not storage.isReadOnly():
            warnings.warn(
                "Storage doesn't have a tpc_vote and this violates "
                "the storage API. Violently monkeypatching in a do-nothing "
                "tpc_vote.", DeprecationWarning, 2)
            storage.tpc_vote = lambda *args: None

        if IMVCCStorage.providedBy(storage):
            temp_storage = storage.new_instance()
        else:
            temp_storage = storage
        try:
            try:
                temp_storage.load(z64, '')
            except KeyError:
                # Create the database's root in the storage if it doesn't exist
                from persistent.mapping import PersistentMapping
                root = PersistentMapping()
                # Manually create a pickle for the root to put in the storage.
                # The pickle must be in the special ZODB format.
                file = cStringIO.StringIO()
                p = cPickle.Pickler(file, 1)
                p.dump((root.__class__, None))
                p.dump(root.__getstate__())
                t = transaction.Transaction()
                t.description = 'initial database creation'
                temp_storage.tpc_begin(t)
                temp_storage.store(z64, None, file.getvalue(), '', t)
                temp_storage.tpc_vote(t)
                temp_storage.tpc_finish(t)
        finally:
            if IMVCCStorage.providedBy(temp_storage):
                temp_storage.release()

        # Multi-database setup.
        if databases is None:
            databases = {}
        self.databases = databases
        self.database_name = database_name
        if database_name in databases:
            raise ValueError("database_name %r already in databases" %
                             database_name)
        databases[database_name] = self
        self.xrefs = xrefs

        self.large_record_size = large_record_size
Exemple #20
0
    dataPath = sys.argv[1]
    if dataPath.endswith(".pickle"):
        ## load from pickle
        crags = cragsFromPickle(dataPath)

        for crag in crags:
            print("{name}".format(**crag))
            for route in crag['route']:
                print(
                    "    {name} ({grade}): {soft} soft, {fair} fair, {hard} hard / {total}\n        Fairness: {fairness}\n"
                    .format(**route))

    elif dataPath.endswith(".json"):
        ## load from json
        crags = cragsFromJson(dataPath)

        # pickle it, now that we've got it
        pickleName = 'pickles/{}.pickle'.format(
            os.path.basename(dataPath).split('.')[0])
        with open(pickleName, 'w') as f:
            print_err("Pickling to {}...".format(pickleName))
            pickler = cPickle.Pickler(f)
            pickler.dump(crags)
        jsonName = 'json/{}.json'.format(
            os.path.basename(dataPath).split('.')[0])
        with open(jsonName, 'w') as f:
            f.write(json.dumps(crags))
    else:
        print_err("Unknown filetype for " + dataPath)
        sys.exit(1)
Exemple #21
0
        dictTagIndex[t] = []

    for sample in tmp_reader:
        sid = sample[0]
        title = sample[1].split()
        body = sample[2].split()
        tags = [t for t in sample[3].split() if t in tagList]

        if (len(tags) == 0):
            continue

        dictMessages[sid] = {
            'tags': tags,
            'title': Counter(title),
            'body': Counter(body)
        }
        dictTagCounts.update(tags)

        for t in tags:
            dictTagIndex[t].append(sid)

    fd.close()

    # Write dictionaries out in pickle file
    outstream = open(args.dictFile, 'wb')
    writer = pickle.Pickler(outstream, pickle.HIGHEST_PROTOCOL)
    writer.dump(dictMessages)
    writer.dump(dictTagCounts)
    writer.dump(dictTagIndex)
    outstream.close()
Exemple #22
0
sys.path.insert(0,parentdir) 

import cPickle
import copy
import record
import json
from collections import defaultdict

assert len(sys.argv) == 4, 'Usage: %s records.pickle photos.json photos.pickle'
_, in_pickle, photos_json, out_pickle = sys.argv

rs = record.AllRecords(in_pickle)
expansions = json.load(file(photos_json))

f = file(out_pickle, "w")
p = cPickle.Pickler(f, 2)

skipped = 0
num_images, num_photos = 0, 0

for idx, r in enumerate(rs):
  digital_id = r.photo_id()
  image_file = '%s.jpg' % digital_id
  if image_file not in expansions:
    # XXX: why skip any images?
    skipped += 1
    continue
  
  num_images += 1

  if len(expansions[image_file]) == 0:
Exemple #23
0
 def dumps(self, arg, proto=0):
     f = StringIO()
     p = cPickle.Pickler(f, proto)
     p.dump(arg)
     f.seek(0)
     return f.read()
Exemple #24
0
 def _hash(s, o):
     f = StringIO.StringIO()
     p = pickle.Pickler(f, -1)
     p.persistent_id = s._filter
     p.dump(o); f.seek(0)
     return f.read()
Exemple #25
0
#########################################################
# File: build_location_dict.py													#
# Created: June 09, 2013																#
# Modified: June 09, 2013																#
# Author: Bogdan State																	#
# Description: Constructs dictionary mapping            #
#   user to location                                    #
#########################################################

import csv
import cPickle as pickle

FILE_LOCATION = "/media/bogdan/61ec6432-da13-415d-9afd-fd46e933f48b/twitter/"
TXT_FILE_NAME = "location_iso2c.txt"
PKL_FILE_NAME = "location_iso2c.pkl"

location_dict = {}
with open(FILE_LOCATION + TXT_FILE_NAME, 'rb') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar='"')
    for row in reader:
        location_dict[row[0]] = row[1]

output = pickle.Pickler(open(FILE_LOCATION + PKL_FILE_NAME, 'wb'))
output.fast = True
output.dump(location_dict)
Exemple #26
0
 def __init__(self, file, protocol=0):
     pickler = pickle.Pickler(file, protocol)
     pickler.persistent_id = self.persistent_id
     self.dump = pickler.dump
     self.clear_memo = pickler.clear_memo
Exemple #27
0
    def accept(self):
        self.log_info("checking connection protocol", 2)
        if not hasattr(self, 'unpickler'):
            stream = ScarabBufferedFile(self.stream)
            # allow exceptions to bubble up
            # FIXME should mark connection as invalid
            # FIXME could block on stream having less than 100 bytes
            head = stream.head(4)
            if head[0:4] == ("\x89" + "CBF"):
                import LDOBinary
                self.unpickler = LDOBinary.LDOBinaryUnmarshaler(stream)
                self.pickler = LDOBinary.LDOBinaryMarshaler(stream)
                self.protocol = 'ldobinary'
            elif head[0:2] == '<?':
                head = stream.head(100)
                if string.find(head, 'urn:schemas-xmlsoap-org:soap.v1') != -1:
                    import SOAP
                    self.unpickler = SOAP.SOAPUnmarshaler(stream)
                    self.pickler = SOAP.SOAPMarshaler(stream)
                    self.protocol = 'soap'
                else:
                    import LDOXML
                    self.unpickler = LDOXML.LDOXMLUnmarshaler(stream)
                    self.pickler = LDOXML.LDOXMLMarshaler(stream)
                    self.protocol = "ldoxml"
            elif head[0:1] != ' ' and head[0:1] != "\t" and head[0:1] != '<':
                import cPickle
                self.unpickler = cPickle.Unpickler(stream)
                self.pickler = cPickle.Pickler(stream)
                self.protocol = "pickle"
            else:
                self.log_info("accept: unrecognized serialization", 2)
                """FIXME LDO-XML or other XML format"""
                raise ScarabConnectionError

            self.log_info("accepted " + self.protocol + " connection")

        error = None
        try:
            request = self.unpickler.load()
            if self.protocol == "soap":
                # FIXME check for invalid data
                method = request['SOAP:Envelope']['SOAP:Body'].keys()[0]
                request = {
                    'object': 'root',
                    'method': method,
                    'args': [request['SOAP:Envelope']['SOAP:Body'][method]]
                }
            if self.debug >= 2:
                self.log_info("request: " + str(request), 3)
        except EOFError:
            self.log_info("EOF on connection", 2)
            self.is_valid = 0
            # FIXME close pickler (which'll close stream?)
            # FIXME notify conn manager
            return
        except:
            exc_type, value, traceback = sys.exc_info()
            # since we load picklers dynamically, we need to check their
            # errors in a dynamic way
            if str(exc_type) == "cPickle.UnpicklingError":
                self.log_info("EOF on connection", 2)
                self.is_valid = 0
                # FIXME close pickler (which'll close stream?)
                # FIXME notify conn manager
                return

            error = "error unmarshaling, closing connection: " \
                           + string.join(format_exception(exc_type, value, traceback))
            self.is_valid = 0
# we'll still try to send a failure response, ya never know
        else:
            t = type(request)
            if t != types.DictType:
                error = "expected dictionary for request, got %s" % ` t.__name__ `
            else:
                if (not (request.has_key('object')
                         and request.has_key('method')
                         and request.has_key('args'))):
                    error = "missing object id, method name, or args in request"
                else:
                    object = request['object']
                    method_name = request['method']

                    if not self.globals.has_key(object):
                        error = "no such object registered: %s" % ` object `
                    else:
                        try:
                            method = getattr(self.globals[object], method_name)
                        except:
                            error = "no such method %s for object %s" \
                                                         % (method_name, `object`,)

        if error != None:
            self.log_info(error)
            result = {'error': error}
        else:
            call_str = object + "." + method_name
            self.log_info("calling `" + call_str + "'")
            try:
                result = apply(method, tuple(request['args']))
            except:
                exc_type, value, traceback = sys.exc_info()
                exc_str = string.join(
                    format_exception(exc_type, value, traceback))
                self.log_info("exception raised in `" + call_str + "': " +
                              exc_str)
                result = {'error': exc_str}
            else:
                self.log_info("`" + call_str + "' returned successfully", 2)
                result = {'result': [result]}

        try:
            if self.debug >= 2:
                self.log_info("response: " + str(result), 2)
            if self.protocol == "soap":
                if result.has_key('error'):
                    self.pickler.encode_fault(100, result['error'], 1)
                else:
                    self.pickler.encode_response(method_name,
                                                 result['result'][0])
            else:
                self.pickler.dump(result)
            try:
                self.pickler.flush()
            except AttributeError:
                self.stream.flush()
        except:
            exc_type, value, traceback = sys.exc_info()
            self.log_info(
                "error sending response" +
                string.join(format_exception(exc_type, value, traceback)))
            if self.is_valid:
            """FIXME ignore, log, or reraise?"""
# else: ignore

# FIXME if not self.is_valid: pickler.close

    def process_string(self, message):
        import StringIO
        self.stream = StringIO.StringIO(message)
        self.accept()
        delattr(self.stream)
        delattr(self.pickler)
        delattr(self.unpickler)

    def run_loop(self):
        # FIXME this is socket specific, see FIXMEs in Scarab.py
        while self.is_valid:
            self.log_info("awaiting connection", 2)
            socket, addr = self.socket.accept()
            self.caller = str(addr)
            stream = socket.makefile('r+')
            server = ScarabConnection(self)
            if self.debug >= 4:
                server.stream = ScarabDebugFile(stream, self)
            else:
                server.stream = stream
            while server.is_valid:
                server.accept()

    def log_info(self, message, level=1):
        if self.debug >= level:
            if hasattr(self, 'caller'):
                print self.caller + ": " + message
            else:
                print message
Exemple #28
0
    directory = "use it as argument"
    try:
        directory = sys.argv[1]
    except KeyError:
        pass

    d = {}

    #file_list = [f for f in glob.glob("%s/*.pisi" % directory) if not f.endswith(".delta.pisi")]
    #Arrangements for new repository structure
    file_list = []
    for dirpath, subdirs, files in os.walk(directory):
        for x in files:
            if x.endswith(".pisi") and not x.endswith("delta.pisi"):
                file_list.append(os.path.join(dirpath, x))

    for p in file_list:
        print "Processing %s.." % p
        for f in filter(lambda x: x.type == "executable",
                        pisi.package.Package(p).get_files().list):
            fpath = os.path.join("/", f.path)
            if os.access(fpath, os.X_OK):
                d[fpath] = pisi.util.split_package_filename(
                    os.path.basename(p))[0]

    o = open("../data/packages.db", "wb")
    cPickle.Pickler(o, protocol=2)
    cPickle.dump(d, o, protocol=2)
    o.close()
Exemple #29
0
 def dumps(obj, protocol=None):
     file = IOtype()
     pickler = pickle.Pickler(file, protocol=PROTOCOL)
     pickler.persistent_id = _function_pickling_handler
     pickler.dump(obj)
     return file.getvalue()
Exemple #30
0
 def dumps(self, arg, bin=0):
     p = cPickle.Pickler(bin)
     p.dump(arg)
     return p.getvalue()