Example #1
0
def singvalsTestMat(matName):
    N = 1000

    if matName == 'lowRankMedNoiseR10':
        A = sp.load('TestMatrices/lowRankMedNoiseR10.npy')
    elif matName == 'lowRankHiNoiseR10':
        A = sp.load('TestMatrices/lowRankHiNoiseR10.npy')
    elif matName == 'polySlowR10':
        A = matrixPolyDecay(N, 1., R=10)
    elif matName == 'polyFastR10':
        A = matrixPolyDecay(N, 2., R=10)
    elif matName == 'expSlowR10':
        A = matrixExpDecay(N, 0.25, R=10)
    elif matName == 'expFastR10':
        A = matrixExpDecay(N, 1., R=10)

#   elif matName == 'SDlarge':
#       A = sp.load('TestMatrices/SD_6135obs_10years_24kParams.npy')

    if matName == 'SDlarge':
        s = sp.load('TestMatrices/singvalsSDlarge.npy')
    else:
        s = svd(A,
                full_matrices=False,
                compute_uv=True,
                overwrite_a=False,
                check_finite=True)[1]
    return s
def svm_prastice_two():
    # 读取数据
    movie_data = sp.load('movie_data.npy')
    movie_target = sp.load('movie_target.npy')
    x = movie_data
    y = movie_target

    # 切分数据集,调用tfidfVector接口
    count_vec = TfidfVectorizer(binary=False,
                                decode_error='ignore',
                                stop_words='english')
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    x_train = count_vec.fit_transform(x_train)
    x_test = count_vec.transform(x_test)
    ''' svm '''
    titles = [
        'LinearSVC (linear kernel', 'SVC with polynomial (degree 3) kernel',
        'SVC with RBF kernel', 'SVC with Sigmoid kernel'
    ]
    clf_linear = svm.SVC(kernel='linear').fit(x_train, y_train)
    clf_poly = svm.SVC(kernel='poly', degree=3).fit(x_train, y_train)
    clf_rbf = svm.SVC().fit(x_train, y_train)
    clf_sigmoid = svm.SVC(kernel='sigmoid').fit(x_train, y_train)

    for i, clf in enumerate((clf_linear, clf_poly, clf_rbf, clf_sigmoid)):
        answer = clf.predict(x_test)
        print(titles[i])
        print(clf)
        print(np.mean((answer == y_test)))
        print(answer)
        print(y_test)
def train(nn_name='12-net', k=12):
    """
    Fucntion for traning 12-net with testing on part of data
    using cross validation
    """
    suff = str(k)
    if nn_name.find('calib') > 0:
        X_data_name = 'train_data_icalib_' + suff + '.npy'
        y_data_name = 'labels_icalib_' + suff + '.npy'
    else:
        X_data_name = 'train_data_' + suff + '.npy'
        y_data_name = 'labels_' + suff + '.npy'

    rates12 = sp.hstack((0.05 * sp.ones(25, dtype=sp.float32),
                         0.005 * sp.ones(15, dtype=sp.float32),
                         0.0005 * sp.ones(10, dtype=sp.float32)))
    rates24 = sp.hstack((0.01 * sp.ones(25, dtype=sp.float32),
                         0.0001 * sp.ones(15, dtype=sp.float32)))
    rates48 = sp.hstack([
        0.05 * sp.ones(15, dtype=sp.float32),
        0.005 * sp.ones(10, dtype=sp.float32)
    ])
    if nn_name == '24-net':
        nn = Cnnl(
            nn_name=nn_name,
            l_rates=rates24,
            subnet=Cnnl(nn_name='12-net',
                        l_rates=rates12).load_model('12-net_lasagne_.pickle'))
    elif nn_name == '48-net':
        nn = Cnnl(nn_name=nn_name,
                  l_rates=rates48,
                  subnet=Cnnl(
                      nn_name='24-net',
                      l_rates=rates24,
                      subnet=Cnnl(
                          nn_name='12-net',
                          l_rates=rates12).load_model('12-net_lasagne_.pickle')
                  ).load_model('24-net_lasagne_.pickle'))
    else:
        nn = Cnnl(nn_name=nn_name, l_rates=rates12)
    if not os.path.exists(nn_name + '_lasagne_.pickle'):
        if nn_name.find('calib') > 0:
            ds.get_train_wider_calib_data(k=k)
        else:
            ds.get_train_data(k=k)
    X, y = sp.load(X_data_name), sp.load(y_data_name)

    X_train, y_train = X, y

    if not os.path.exists(nn_name + '_lasagne_.pickle'):
        if nn_name == '24-net':
            X_sub_train12 = sp.load('train_data_12.npy')
            nn.fit(X=X_train, y=y_train, X12=X_sub_train12)
        elif nn_name == '48-net':
            X_sub_train12 = sp.load('train_data_12.npy')
            X_sub_train24 = sp.load('train_data_24.npy')
            nn.fit(X=X_train, y=y_train, X12=X_sub_train12, X24=X_sub_train24)
        else:
            nn.fit(X=X_train, y=y_train)
        nn.save_model(nn_name + '_lasagne_.pickle')
def runTestForMat(inputs):
    [rank, matName, MainSubfolder, inneriter, OversVec, N] = inputs

    # Get specified test matrix:
    if matName == 'lowRankMedNoiseR10':
        A = sp.load('TestMatrices/lowRankMedNoiseR10.npy')
    elif matName == 'lowRankHiNoiseR10':
        A = sp.load('TestMatrices/lowRankHiNoiseR10.npy')
    elif matName == 'polySlowR10':
        A = matrixPolyDecay(N, 1., R=10)
    elif matName == 'polyFastR10':
        A = matrixPolyDecay(N, 2., R=10)
    elif matName == 'expSlowR10':
        A = matrixExpDecay(N, 0.25, R=10)
    elif matName == 'expFastR10':
        A = matrixExpDecay(N, 1., R=10)
    elif matName == 'SDlarge':
        ####        A = sp.load('TestMatrices/SD_6135obs_10years_24kParams.npy')
        # For use the following as a surrogate for large Jacobian (SD):
        A = sp.diag(sp.load('TestMatrices/singvalsSDlarge.npy')[0:1000])

    # Create folder for saving results:
    matName = matName + '_Rank' + str(rank)
    subfolder = MainSubfolder + matName
    newpath = os.getcwd() + '/' + subfolder
    if not os.path.exists(newpath):
        os.makedirs(newpath)

    # Run subspace iteration tests for test matrix:
    temptime = time.clock()
    main(A, rank, matName, subfolder, inneriter, OversVec, viewMax=8)
    print matName, 'Time spent on Test', time.clock() - temptime
Example #5
0
    def create_tag(self, mfcc_features_40):
        data_mean = mfcc_features_40.mean(axis=0).tolist()
        data_std = mfcc_features_40.std(axis=0).tolist()
        data_median = np.median(mfcc_features_40, axis=0).tolist()
        data_skev = skew(mfcc_features_40, axis=0).tolist()
        data_kurt = kurtosis(mfcc_features_40, axis=0).tolist()
        data = data_mean + data_std + data_median + data_skev + data_kurt

        coef = scipy.load(self.clust_dir + 'med_coef.npy')
        f_importances = scipy.load(self.clust_dir + 'feature_importances.npy')
        np_data = np.array(data)
        np_data = np_data / np.linalg.norm(np_data)

        datafor_lregr = np_data[f_importances > coef[1]]
        regr = joblib.load(self.clust_dir + 'lrc.pkl')
        data_regr = regr.predict_proba([datafor_lregr]).tolist()

        self.data = np_data[f_importances > coef[2]]
        data_km = []
        kmean1 = joblib.load(self.clust_dir + 'kmeans1.pkl')
        data_km += kmean1.predict([self.data]).tolist()
        kmean2 = joblib.load(self.clust_dir + 'kmeans2.pkl')
        data_km += kmean2.predict([self.data]).tolist()
        kmean3 = joblib.load(self.clust_dir + 'kmeans3.pkl')
        data_km += kmean3.predict([self.data]).tolist()
        kmean4 = joblib.load(self.clust_dir + 'kmeans4.pkl')
        data_km += kmean4.predict([self.data]).tolist()

        datafor_rfc = np_data[f_importances > coef[0]]
        rfc = joblib.load(self.clust_dir + 'rfc.pkl')
        self.ftag[rfc.predict(
            np.array([datafor_rfc.tolist() + data_regr[0] + data_km]))[0]] += 1
        self.clust = data_km[2]
Example #6
0
def cv(nn_name,d_num = 10000,k_fold = 7,score_metrics = 'accuracy',verbose = 0):
    suff = str(nn_name[:2])
    if nn_name.find('calib') > 0:
        X_data_name = 'train_data_icalib_'+ suff +  '.npy'
        y_data_name = 'labels_icalib_'+ suff + '.npy'
    else:
        X_data_name = 'train_data_'+ suff +  '.npy'
        y_data_name = 'labels_'+ suff + '.npy'
    X,y = sp.load(X_data_name),sp.load(y_data_name)
    d_num = min(len(X),d_num)        
    X = X[:d_num]
    y = y[:d_num] 
    rates12 = sp.hstack((0.05 * sp.ones(25,dtype=sp.float32),0.005*sp.ones(15,dtype=sp.float32),0.0005*sp.ones(10,dtype=sp.float32)))
    rates24 = sp.hstack((0.01 * sp.ones(25,dtype=sp.float32),0.0001*sp.ones(15,dtype=sp.float32)))
    rates48 = sp.hstack ([0.05 * sp.ones(15,dtype=sp.float32),0.005*sp.ones(10,dtype=sp.float32) ])
    if nn_name == '48-net':
        X12 = sp.load('train_data_12.npy')[:d_num]
        X24 = sp.load('train_data_24.npy')[:d_num]
    elif nn_name == '24-net':
        X12 = sp.load('train_data_12.npy')[:d_num]
        
    if score_metrics == 'accuracy':
        score_fn = accuracy_score
    else:
        score_fn = f1_score 
    scores = []
    iteration = 0
    for t_indx,v_indx in util.kfold(X,y,k_fold=k_fold):
        nn = None
        X_train,X_test,y_train,y_test = X[t_indx], X[v_indx], y[t_indx], y[v_indx]
        
        #print('\t \t',str(iteration+1),'fold out of ',str(k_fold),'\t \t' )
        if nn_name == '24-net':
            nn = Cnnl(nn_name = nn_name,l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model(
            '12-net_lasagne_.pickle'))
            nn.fit(X = X_train,y = y_train,X12 = X12[t_indx])
        elif nn_name == '48-net':
            nn = Cnnl(nn_name = nn_name,l_rates=rates48,subnet=Cnnl(nn_name = '24-net',l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model(
            '12-net_lasagne_.pickle')).load_model('24-net_lasagne_.pickle'))
            nn.fit(X = X_train,y = y_train,X12 = X12[t_indx],X24 = X24[t_indx])
        else:
            
            nn = Cnnl(nn_name = nn_name,l_rates=rates12,verbose=verbose)
            nn.fit(X = X_train,y = y_train)
    
        if nn_name == '24-net':  
            y_pred = nn.predict(X_test,X12=X12[v_indx])
        elif nn_name == '48-net':
            y_pred = nn.predict(X_test,X12=X12[v_indx],X24=X24[v_indx])
        else:
            y_pred = nn.predict(X_test)
        score = score_fn(y_test,y_pred)
        
        #print(iteration,'fold score',score)
        scores.append(score)
        iteration += 1
    score_mean = sp.array(scores).mean()
    print(d_num,'mean score',score)
    return score_mean
Example #7
0
    def ParseToDataContainers(self,
                              Delimiter=None):
#       Parse an input file into the DataContainers object
        DCs  = DataContainer.DataContainers()
        if(re.search('.npy',self.GetName())):
            Arrays = None
            if(self.GetboCompressed()):
                Arrays = scipy.load(self.GetDecomprName())
            else:
                Arrays = scipy.load(self.GetName())
            Header = Arrays[:,0].tolist()
            for i in xrange(len(Header)):
                Name                     = Header[i] # The names of the datacontainers are determined by the
                                                     # header column names.
                DCs.DataContainers[Name] = DataContainer.DataContainer()
                DCs.Names2Columns[Name]  = i
                DCs.Columns2Names[i]     = Name
                DCs.DataContainers[Name].SetDataArray(Arrays[i,1:])
                DCs.DataContainers[Name].SetDataName(Name)
            del Arrays
        else:
            Line = self.GetFileHandle().readline()
            if(self.GetboHeader()):
                Line  = re.sub('#','',Line)
                Names = Line.strip().split(Delimiter) # The file should be space or tab delimited!
                for i in range(len(Names)):
                    Name                     = Names[i] # The names of the datacontainers are determined by the
                                                        # header column names.
                    DCs.DataContainers[Name] = DataContainer.DataContainer()
                    DCs.Names2Columns[Name]  = i
                    DCs.Columns2Names[i]     = Name
                    DCs.DataContainers[Name].InitDataArray()
                    DCs.DataContainers[Name].SetDataName(Name)
            else:
                LSplit = Line.strip().split(Delimiter)
                for i in range(len(LSplit)):
                    Name                     = str(i)
                    DCs.DataContainers[Name] = DataContainer.DataContainer()
                    DCs.Names2Columns[Name]  = i
                    DCs.Columns2Names[i]     = Name
                    DCs.DataContainers[Name].InitDataArray()
                    DCs.DataContainers[Name].SetDataName(Name)
                    Entry = LSplit[i]
                    DCs.DataContainers[Name].AppendToArray(Entry)

            for Line in self.GetFileHandle():
                LSplit = Line.strip().split(Delimiter)
                for i in range(len(LSplit)):
                    Name  = DCs.Columns2Names[i]
                    Entry = LSplit[i]
                    DCs.DataContainers[Name].AppendToArray(Entry)

        for Key in DCs.DataContainers.iterkeys():
            DCs.DataContainers[Key].CastDataArrayToScipy() # Make scipy.arrays of the lists.

        return DCs
Example #8
0
def save_andor_load_arrays(endog, exog, true_params, save_arrays, load_old_arrays):
    if save_arrays:
        sp.save("endog.npy", endog)
        sp.save("exog.npy", exog)
        sp.save("true_params.npy", true_params)
    if load_old_arrays:
        endog = sp.load("endog.npy")
        exog = sp.load("exog.npy")
        true_params = sp.load("true_params.npy")
    return endog, exog, true_params
Example #9
0
def save_andor_load_arrays(endog, exog, true_params, save_arrays,
                           load_old_arrays):
    if save_arrays:
        sp.save('endog.npy', endog)
        sp.save('exog.npy', exog)
        sp.save('true_params.npy', true_params)
    if load_old_arrays:
        endog = sp.load('endog.npy')
        exog = sp.load('exog.npy')
        true_params = sp.load('true_params.npy')
    return endog, exog, true_params
Example #10
0
def save_andor_load_arrays(
        endog, exog, true_params, save_arrays, load_old_arrays):
    if save_arrays:
        sp.save('endog.npy', endog)
        sp.save('exog.npy', exog)
        sp.save('true_params.npy', true_params)
    if load_old_arrays:
        endog = sp.load('endog.npy')
        exog = sp.load('exog.npy')
        true_params = sp.load('true_params.npy')
    return endog, exog, true_params
Example #11
0
 def load(cls, dirname):
     import json
     model = cls()
     params = json.load(open("{}/params.json".format(dirname)))
     model.alpha = params["alpha"]
     model.beta = params["beta"]
     model.n_topics = params["n_topics"]
     model.n_docs = params["n_docs"]
     model.n_words = params["n_words"]
     model.B = params["B"]
     model.phi = sp.load("{}/phi.npy".format(dirname))
     model.theta = sp.load("{}/theta.npy".format(dirname))
     return model
Example #12
0
	def execute(self, nprocesses=1):

		params = self.params
		boxshape = params['boxshape']
		boxunit = params['boxunit']
		resultf = params['hr'][0]
		if len(params['last']) != 0:
			resultf = resultf + params['last'][0]
		resultf = resultf + '-' + params['hr'][1]
		if len(params['last']) != 0:
			resultf = resultf + params['last'][1]
		
		FKPweight = params['FKPweight']
		in_root = params['input_root']
		out_root = params['output_root']
		mid = params['mid']
		fkpp = params['FKPpk']


		WindowF_fname = out_root+'WindowF_'+\
			str(boxshape[0])+'x'+str(boxshape[1])+'x'+\
			str(boxshape[2])+'x'+str(boxunit)+'_'+resultf
		kWindowF_fname = out_root+'k_WindowF_'+\
			str(boxshape[0])+'x'+str(boxshape[1])+'x'+\
			str(boxshape[2])+'x'+str(boxunit)+'_'+resultf

		print WindowF_fname

		try:
			WindowF = sp.load(WindowF_fname+'.npy')
			k = sp.load(kWindowF_fname+'.npy')
		except IOError:
			print '\tWindow Functin ReMake'
			WindowF, k = self.GetWindowFunctionData()

		non0 = WindowF.nonzero()
		sp.save(WindowF_fname, WindowF)
		sp.save(kWindowF_fname, k)

		#txtf = open(out_root+'window_for_idl.txt', 'w')
		#try:
		#	for i in range(len(WindowF)):
		#		if WindowF[i]==0: continue
		#		print >>txtf, '{0} {1}'.format(k[i], WindowF[i])
		#finally:
		#	txtf.close()

		return WindowF, k
Example #13
0
def make_video(image_dir, filename="vidout.avi", fixation_file=None):

    MPEG_FOURCC = 827148624
    vwriter = cv2.VideoWriter()

    if fixation_file is not None:
        fixations = sp.load(fixation_file)
        fixations[sp.isnan(fixations)] = -100
        fixations[abs(fixations) > 1000] = 1000
    else:
        fixations = []

    im_base_name = "cam1_frame_"
    im_extension = ".bmp"

    suc = vwriter.open(os.path.join(image_dir, filename), cv.CV_FOURCC('M', 'J', 'P', 'G'), 30, (640,480))

    if not suc:
        raise IOError("Failed to open movie")

    for frame_num in xrange(1000):
        im_name = "".join([im_base_name, str(frame_num), im_extension])
        im_path = os.path.join(image_dir, im_name)
        im = cv2.imread(im_path)
        if len(fixations) != 0:
            cv2.circle(im, tuple(fixations[frame_num]), 3, (255, 255, 255))
        vwriter.write(im)
Example #14
0
def loadTFIDF(path):

    weight=sp.load('tfidf_weight.npy')
    fp=codecs.open('tfidf_words.txt','r','utf-8')
    words=json.load(fp)
    fp.close()    
    return words,weight
Example #15
0
def process_newton_args(args, log):
    log("Newton Solver Setup")
    log("-------------------")
    mcmcsteps = args.mcsteps  
    log("Running {} Newton-MCMC rounds".format(mcmcsteps))

    param = {'mcmcsteps': args.mcsteps,
             'newtonSteps': args.newtonsteps,
             'gamma0': args.gamma,
             'pcdamping': args.damping,
             'jclamp': args.jclamp,
             'resetseqs': args.resetseqs,
             'preopt': args.preopt }
    p = attrdict(param)

    cutoffstr = ('dJ clamp {}'.format(p.jclamp) if p.jclamp != 0 
                 else 'no dJ clamp')
    log(("Updating J locally with gamma = {}, {}, and pc-damping {}. "
         "Running {} Newton update steps per round.").format(
          p.gamma0, cutoffstr, p.pcdamping, p.newtonSteps))

    log("Reading target marginals from file {}".format(args.bimarg))
    bimarg = scipy.load(args.bimarg)
    if bimarg.dtype != dtype('<f4'):
        raise Exception("Bimarg in wrong format")
        #could convert, but this helps warn that something may be wrong
    if any(~((bimarg.flatten() >= 0) & (bimarg.flatten() <= 1))):
        raise Exception("Bimarg must be nonzero and 0 < f < 1")
    log("Target Marginals: " + printsome(bimarg) + "...")

    log("")
    p['bimarg'] = bimarg
    return p
Example #16
0
    def load(self, filename):
        r'''
        Loads a previously saved object's data onto new, empty Generic object

        Parameters
        ----------
        filename : string
            The file containing the saved object data in Numpy zip format (npz)

        Examples
        --------
        >>> pn = OpenPNM.Network.Cubic(shape=[3,3,3])
        >>> pn.save('test_pn')

        >>> gn = OpenPNM.Network.GenericNetwork()
        >>> gn.load('test_pn')  
        
        >>> # Remove newly created file
        >>> import os
        >>> os.remove('test_pn.npz')

        '''
        if (self.Np == 0) and (self.Nt == 0):
            filename = filename.split('.')[0] + '.npz'
            temp = sp.load(filename)
            data_dict = temp['data'].item()
            info_dict = temp['info'].item()
            self.update(data_dict)
            self._name = info_dict['name']
            temp.close()
        else:
            raise Exception('Cannot load saved data onto an active object')
Example #17
0
    def test_BCSSTK15(self):
        '''
        A is using BCSSTK15 finded at https://math.nist.gov/MatrixMarket/data/Harwell-Boeing/bcsstruc2/bcsstk15.html
        x is generated by scipy.random.rand

        3948 x 3948, 60882 entries, 99.61% sparsity
        '''
        data = sp.load('./BCSSTK15.npz')

        self._test_cg(self.test_BCSSTK15.__name__, linalg.solve, data['A'],
                      data['b'], data['x'])
        self._test_cg(self.test_BCSSTK15.__name__, BookCG, data['A'],
                      data['b'], data['x'])
        self._test_cg(self.test_BCSSTK15.__name__, WikiCG, data['A'],
                      data['b'], data['x'])
        # scipy.sparse.linalg.cg needs about 1.5 hours to benchmark it
        # self._test_cg(self.test_BCSSTK15.__name__, ScipyCG, csc_matrix(data['A']), data['b'], data['x'])
        self._test_cg(self.test_BCSSTK15.__name__, ScipyCGS,
                      csc_matrix(data['A']), data['b'], data['x'])
        # scipy.sparse.linalg.bicg needs about 2.6 hours to benchmark it
        # self._test_cg(self.test_BCSSTK15.__name__, ScipyBicG, csc_matrix(data['A']), data['b'], data['x'])
        self._test_cg(self.test_BCSSTK15.__name__, ScipyBicGStab,
                      csc_matrix(data['A']), data['b'], data['x'])
        self._test_cg(self.test_BCSSTK15.__name__, ScipySpSolve,
                      csc_matrix(data['A']), data['b'], data['x'])
Example #18
0
def load_feature_matrix(src, dtype=sp.float32):
    if src.endswith(".npz"):
        return smat.load_npz(src).tocsr().astype(dtype)
    elif src.endswith(".npy"):
        return smat.csr_matrix(sp.ascontiguousarray(sp.load(src), dtype=dtype))
    else:
        raise ValueError("src must end with .npz or .npy")
Example #19
0
    def loadMatrix(self, fname):
        """ Loads the free Hamiltonian and potential matrices from file """

        f = scipy.load(fname)
        self.L = f['arr_0'].item()
        self.m = f['arr_1'].item()

        Emax = f['arr_2'].item()
        nmax = f['arr_3'].item()
        bcs = f['arr_4'].item()

        self.buildFullBasis(L=self.L, m=self.m, Emax=Emax, bcs=bcs)

        basisI = self.fullBasis
        basisJ = self.fullBasis

        print(basisI.size)

        self.h0 = Matrix(
            basisI, basisJ,
            scipy.sparse.coo_matrix((f['arr_5'], (f['arr_6'], f['arr_7'])),
                                    shape=(basisI.size, basisJ.size)))

        self.potential = Matrix(
            basisI, basisJ,
            scipy.sparse.coo_matrix((f['arr_8'], (f['arr_9'], f['arr_10'])),
                                    shape=(basisI.size, basisJ.size)))
Example #20
0
    def from_file(fname):
        """Load model from a npz file"""

        params = dict(sc.load(fname).items())
        model = Model(fname, **params)
        if "seed" in params:
            model.set_seed(model["seed"])
        return model
Example #21
0
 def wrapper(cls, path, **kwargs):
     state = {}
     try:
         state = scipy.load(path, allow_pickle=True)[()]
     except IOError:
         raise IOError('Invalid path: {}.'.format(path))
     cls.logger.info('Loading {}: {}.'.format(cls.__name__, path))
     return loadstateclass(func)(cls, state, **kwargs)
Example #22
0
 def load(cls, path):
     state = {}
     try:
         state = scipy.load(path)[()]
     except IOError:
         raise IOError('Invalid path: {}.'.format(path))
     cls.logger.info('Loading {}: {}.'.format(cls.__name__, path))
     return cls.loadstate(state)
def load_est_data_VA(data_flag, IC):

    in_dir = '%s/objects/%s' % (DATA_DIR, data_flag)

    with gzip.open('%s/obj_IC=%s.pklz' % (in_dir, IC), 'rb') as f:
        obj = cPickle.load(f)
    params = sp.load('%s/params_IC=%s.npy' % (in_dir, IC))
    paths = sp.load('%s/paths_IC=%s.npy' % (in_dir, IC))
    errors = sp.load('%s/action_errors_IC=%s.npy' % (in_dir, IC))

    est_dict = dict()
    est_dict['obj'] = obj
    est_dict['params'] = params
    est_dict['paths'] = paths
    est_dict['errors'] = errors

    return est_dict
Example #24
0
    def from_file(fname):
        """Load model from a npz file"""

        params = dict(sc.load(fname).items())
        model = Model(fname, **params)
        if "seed" in params:
            model.set_seed(model["seed"])
        return model
Example #25
0
def test_brown_clustering():
    fname = "test-data/text-1e2.npz"
    F = sc.load( fname )
    C, D = F['C'], F['D']
    k = 100
    W = 1000

    bc = BrownClusteringAlgorithm( C )
    bc.run( k, W )
def train(nn_name = '12-net',k = 12):
    """
    Fucntion for traning 12-net with testing on part of data
    using cross validation
    """
    suff = str(k)
    if nn_name.find('calib') > 0:
        X_data_name = 'train_data_icalib_'+ suff +  '.npy'
        y_data_name = 'labels_icalib_'+ suff + '.npy'
    else:
        X_data_name = 'train_data_'+ suff +  '.npy'
        y_data_name = 'labels_'+ suff + '.npy'
    
    rates12 = sp.hstack((0.05 * sp.ones(25,dtype=sp.float32),0.005*sp.ones(15,dtype=sp.float32),0.0005*sp.ones(10,dtype=sp.float32)))
    rates24 = sp.hstack((0.01 * sp.ones(25,dtype=sp.float32),0.0001*sp.ones(15,dtype=sp.float32)))
    rates48 = sp.hstack ([0.05 * sp.ones(15,dtype=sp.float32),0.005*sp.ones(10,dtype=sp.float32) ])
    if nn_name == '24-net':
        nn = Cnnl(nn_name = nn_name,l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model(
            '12-net_lasagne_.pickle'))
    elif nn_name == '48-net':    
        nn = Cnnl(nn_name = nn_name,l_rates=rates48,subnet=Cnnl(nn_name = '24-net',l_rates=rates24,subnet=Cnnl(nn_name = '12-net',l_rates=rates12).load_model(
            '12-net_lasagne_.pickle')).load_model('24-net_lasagne_.pickle'))     
    else:
        nn = Cnnl(nn_name = nn_name,l_rates=rates12)
    if not os.path.exists(nn_name   + '_lasagne_.pickle'): 
        if nn_name.find('calib') > 0:
            ds.get_train_wider_calib_data(k=k)  
        else:
            ds.get_train_data(k=k)
    X,y = sp.load(X_data_name),sp.load(y_data_name)
        
    X_train,y_train = X,y
    
    if not os.path.exists(nn_name   + '_lasagne_.pickle'):
        if nn_name == '24-net':
            X_sub_train12 = sp.load('train_data_12.npy')
            nn.fit(X = X_train,y = y_train,X12 = X_sub_train12)
        elif nn_name == '48-net':
            X_sub_train12 = sp.load('train_data_12.npy')
            X_sub_train24 = sp.load('train_data_24.npy')
            nn.fit(X = X_train,y = y_train,X12 = X_sub_train12,X24 = X_sub_train24)
        else:
            nn.fit(X = X_train,y = y_train)
        nn.save_model(nn_name   + '_lasagne_.pickle')
Example #27
0
def getCouplings(args, L, nB, bimarg, log):
    couplings = None

    if args.seqmodel and args.seqmodel in ['zero', 'logscore']:
        args.couplings = args.seqmodel

    if args.couplings is not None:
        #first try to generate couplings (requires L, nB)
        if args.couplings in ['zero', 'logscore']:
            if L is None: # we are sure to have nB
                raise Exception("Need L to generate couplings")
        if args.couplings == 'zero':
            log("Setting Initial couplings to 0")
            couplings = zeros((L*(L-1)/2, nB*nB), dtype='<f4')
        elif args.couplings == 'logscore':
            log("Setting Initial couplings to Independent Log Scores")
            if bimarg is None:
                raise Exception("Need bivariate marginals to generate "
                                "logscore couplings")
            h = -np.log(getUnimarg(bimarg))
            J = zeros((L*(L-1)/2,nB*nB), dtype='<f4')
            couplings = fieldlessGaugeEven(h, J)[1]
        else: #otherwise load them from file
            log("Reading couplings from file {}".format(args.couplings))
            couplings = scipy.load(args.couplings)
            if couplings.dtype != dtype('<f4'):
                raise Exception("Couplings must be in 'f4' format")
    elif args.seqmodel and args.seqmodel not in ['zero', 'logscore']:
        # and otherwise try to load them from model directory
        fn = os.path.join(args.seqmodel, 'J.npy')
        if os.path.isfile(fn):
            log("Reading couplings from file {}".format(fn))
            couplings = scipy.load(fn)
            if couplings.dtype != dtype('<f4'):
                raise Exception("Couplings must be in 'f4' format")
    L2, nB2 = seqsize_from_param_shape(couplings.shape)
    L, nB = updateLnB(L, nB, L2, nB2, 'couplings')

    if couplings is None:
        raise Exception("Could not find couplings. Use either the "
                        "'couplings' or 'seqmodel' options.")

    return couplings, L, nB
Example #28
0
def main():
    """
    朴素贝叶斯实现
    """
    # 加载数据
    movies_reviews = load_files("./data/tokens")
    sp.save('./data/movie_data.npy', movies_reviews.data)
    sp.save('./data/movie_target.npy', movies_reviews.target)

    movie_data = sp.load('./data/movie_data.npy')
    movie_target = sp.load('./data/movie_target.npy')
    x = movie_data
    y = movie_target

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

    count_vec = TfidfVectorizer(binary=False, decode_error='ignore', stop_words="english")
    # 训练数据
    x_train = count_vec.fit_transform(x_train)
    x_test = count_vec.transform(x_test)

    # 分类器   
    clf = MultinomialNB().fit(x_train, y_train)
    # doc_pred = clf.predict(x_test)
    # print("平均值:", np.mean(doc_pred == y_test))
    # 可用 clf.score 代替以上均值
    
    score = clf.score(x_test, y_test)
    print("score:",score)

    # 准确率  召回率
    precision, recall, thresholds = precision_recall_curve(
        y_test, clf.predict(x_test))

    answer = clf.predict_proba(x_test)[:, 1]
    report = answer > 0.5
    print(classification_report(y_test, report, target_names=['net', 'pos']))

    # 特征名称
    # print(count_vec.get_feature_names())
    # 保存模型
    model_path =  "./models/clf_bayes.model"
    joblib.dump(clf, model_path, compress=0)
Example #29
0
def full_image(i=None):
    global _full
    if _full is None:
        path = Path('cache/full.npy')
        if not path.exists():
            ims = [_full_image(i) for i in range(1, COUNTS['full'] + 1)]
            sp.save(path, sp.stack(ims))
        _full = sp.load(path)
    ims = _full[i - 1] if i is not None else _full
    return ims
Example #30
0
    def from_file( fname ):
        """Load model from a HDF file"""

        if not fname.endswith(".npz"):
            fname += ".npz"
        params = dict( sc.load( fname ).items() )
        model = Model( fname, **params )
        if "seed" in params:
            model.set_seed( model.get_parameter("seed") )
        return model
Example #31
0
 def calc_dispersion_npy(self, src, dst, axis=1):
     data = sp.load(src)
     background = data[0, :, :, :, :]
     data = data - background[None, :, :, :, :]
     disp = sp.sum(sp.absolute(
         fftpack.fftshift(fftpack.fft2(data, axes=(0, axis)),
                          axes=(0, axis))),
                   axis=tuple([a for a in range(5) if a not in (axis, 0)]))
     sp.save(dst, disp)
     return 0
def returnABmatsConFluxesEnthalpies(NEQ, strlcount, timestepnr, timestepnrB):
    # Open and read adjoint file:
    if timestepnr == -1:  # Read nat state file
        adjfile = h5py.File('natFWD' + strlcount + '.h5', 'r')
    else:  # Read production file
        adjfile = h5py.File('prodFWD' + strlcount + '.h5', 'r')
    # Read current time-step from file:
    dt = adjfile['fulltimes']['DELTEX'][timestepnr]

    fwdrows = sp.load('fwdrows.npy')
    fwdcols = sp.load('fwdcols.npy')
    accrows = sp.load('accrows.npy')
    acccols = sp.load('acccols.npy')
    zerosAmat = sp.load('zerosAmat.npy')
    onesAmat = sp.load('onesAmat.npy')
    zerosBmat = sp.load('zerosBmat.npy')

    # Assemble forward matrix from file for the latest time-step:
    fwdels = adjfile['adjoint/fwdmat/co'][:, timestepnr]
    # Adjust values for large or constant boundary blocks:
    fwdels[zerosAmat] = 0.
    fwdels[onesAmat] = 1.
    A = csr_matrix((fwdels, (fwdrows, fwdcols)),
                   shape=(NEQ, NEQ))  # FWD matrix

    if (timestepnrB == -1) and (timestepnr == 0):
        # First production time-step using direct method
        # Read B matrix from last natural state time:
        natadjfile = h5py.File('natFWD' + strlcount + '.h5', 'r')
        accels = natadjfile['adjoint/accmat/co'][:, timestepnrB]
        natadjfile.close()
    else:
        accels = adjfile['adjoint/accmat/co'][:, timestepnrB]
    # Adjust values for large or constant boundary blocks:
    zerosBmat = sp.load('zerosBmat.npy')
    # Adjust values for large or constant boundary blocks:
    accels[zerosBmat] = 0.
    B = csr_matrix((accels, (accrows, acccols)),
                   shape=(NEQ, NEQ))  # ACC matrix

    # Read flux information at block interfaces:
    fluxw = adjfile['adjoint/flowenth'][
        timestepnr, :, 2]  # for storing liquid phase interface fluxes
    enthw = adjfile['adjoint/flowenth'][
        timestepnr, :, 3]  # for storing liquid phase interface enthalpies
    fluxg = adjfile['adjoint/flowenth'][
        timestepnr, :, 0]  # for storing vapour phase interface fluxes
    enthg = adjfile['adjoint/flowenth'][
        timestepnr, :, 1]  # for storing vapour phase interface enthalpies

    adjfile.close()

    return dt, A, B, fluxw, enthw, fluxg, enthg
Example #33
0
def draw_raw_signal_around_genes(raw_signals, out_png, windowsize=20000):
    """draw the raw signals as computed by make_raw_signal_around_genes"""
    gene_expr = filter(lambda f: 'gene_expr' in f, raw_signals)
    reads = filter(lambda f: 'gene_expr' not in f and 'matched_size' not in f, raw_signals)
    pyplot.figure()
    f, plots = pyplot.subplots(1, len(reads)+1, sharex=False, sharey=True, squeeze=False)
    #sig_min = reduce(min, map(min, map(sp.load, reads)))
    #sig_max = reduce(max, map(max, map(sp.load, reads)))
    for i, read_sig in enumerate(reads):
        #plots[i+1].imshow(sp.load(read_sig), interpolation='nearest', vmin=sig_min, vmax=sig_max)
        plots[0, i+1].imshow(sp.ma.filled(sp.load(read_sig), fill_value=0).T, interpolation='nearest', aspect=.05)
        plots[0, i+1].text(0,0,read_sig.split('gene.expression.')[1].split('.')[0], rotation=30, verticalalignment='bottom')
    gexpr_ma = sp.load(gene_expr[0]).astype(float)
    plots[0, 0].imshow(sp.ma.filled(gexpr_ma.reshape(1,gexpr_ma.shape[0]), fill_value=0).T, interpolation='nearest', aspect=.002)
    #yticks(sp.arange())
    shape = sp.load(read_sig).shape
    pyplot.xticks(sp.arange(0, shape[0] + shape[0]/4, shape[0] / 4), sp.arange(-windowsize/2, windowsize/2 + windowsize/4, windowsize/4))
    f.savefig(out_png)
    pyplot.close('all')
def AandBmatZeroOneFill(h5filename):
    # Open and read HDF5 output file:
    # (the natural state file may typically be smaller than the production file)
    adjfile = h5py.File(h5filename, 'r')

    # IRN (forward matrix row indices), ICN (forward matrix column indices) as an array
    fwdrows = adjfile[
        'adjoint/fwdmat/irnicn'][:,
                                 0] - 1  # Minus 1 to convert indexing from FORTRAN to Python
    fwdcols = adjfile[
        'adjoint/fwdmat/irnicn'][:,
                                 1] - 1  # Minus 1 to convert indexing from FORTRAN to Python
    sp.save('fwdrows.npy', fwdrows)
    sp.save('fwdcols.npy', fwdcols)
    # IRN (accumulation matrix row indices), ICN (accumulation matrix column indices) as an array
    accrows = adjfile[
        'adjoint/accmat/irnicn'][:,
                                 0] - 1  # Minus 1 to convert indexing from FORTRAN to Python
    acccols = adjfile[
        'adjoint/accmat/irnicn'][:,
                                 1] - 1  # Minus 1 to convert indexing from FORTRAN to Python
    sp.save('accrows.npy', accrows)
    sp.save('acccols.npy', acccols)

    # Indices for large constant boundary blocks:
    VOLS = sp.load('Volumes.npy')
    zerosAmat = []  # Indices for FWD matrix elements to be set to zero
    onesAmat = []  # Indices for FWD matrix elements to be set to one
    for i in range(0, len(fwdrows)):
        irn = fwdrows[i]
        icn = fwdcols[i]
        rVol = VOLS[int(
            irn / 2
        )]  # SHOULD PROBABLY USE NK1 instead of 2 ???? !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        cVol = VOLS[int(icn / 2)]
        if (rVol > 1.E20) or (rVol < 0.) or (cVol > 1.E20) or (cVol < 0.):
            if (irn == icn):
                onesAmat.extend([i])
            else:
                zerosAmat.extend([i])
    zerosBmat = []  #  Indices for ACC matrix elements to be set to zero
    for i in range(0, len(accrows)):
        irn = accrows[i]
        icn = acccols[i]
        rVol = VOLS[int(
            irn / 2
        )]  # SHOULD PROBABLY USE NK1 instead of 2 ???? !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        cVol = VOLS[int(icn / 2)]
        if (rVol > 1.E20) or (rVol < 0.) or (cVol > 1.E20) or (cVol < 0.):
            zerosBmat.extend([i])
    sp.save('zerosAmat.npy', zerosAmat)
    sp.save('onesAmat.npy', onesAmat)
    sp.save('zerosBmat.npy', zerosBmat)
    adjfile.close()
    return
def loadMSER_npy(fn=nn_data_sets.NN_DATA_MSER,datadir=NN_DATA_DIR):
    '''
    As a shortcut to loading the MSER data set from the 1000's of files found in the
    ukbench_extract folder, one should call loadMSER() once, and save the resulting
    numpy array to a single file. This function assumes you have done so, and will
    load the MSER data from the specified numpy file.
    @Note: This function really doesn't do anything but put comments around the
    use of numpy.load(...). Use numpy.save( filename, M) to create the saved file
    in the first place.
    '''
    return scipy.load( os.path.join(datadir, fn) )
Example #36
0
def read_file(name):
    fname = fname_template %name
    if os.path.exists(fname+'_x.npy') and os.path.exists(fname+'_y.npy'):
        xs = scipy.load(fname+'_x.npy')
        ys = scipy.load(fname+'_y.npy')
        return xs, ys
    elif os.path.exists(fname):
        with open(fname) as fh:
            lines = fh.readlines()
        ns = len(lines)
        x, y = numpy.ndarray(ns), numpy.ndarray(ns, dtype=complex)
        for i, l in enumerate(lines):
            xr, yr = l.split('\t')
            x[i] = eval(xr)
            y[i] = eval(yr)
        return x, y
    else:
        print >>sys.stderr, 'Manca il file %s' %(fname_template %name,)
        #sys.exit(1)
        return None, None
Example #37
0
def read_file(name):
    fname = fname_template % name
    if os.path.exists(fname + '_x.npy') and os.path.exists(fname + '_y.npy'):
        xs = scipy.load(fname + '_x.npy')
        ys = scipy.load(fname + '_y.npy')
        return xs, ys
    elif os.path.exists(fname):
        with open(fname) as fh:
            lines = fh.readlines()
        ns = len(lines)
        x, y = numpy.ndarray(ns), numpy.ndarray(ns, dtype=complex)
        for i, l in enumerate(lines):
            xr, yr = l.split('\t')
            x[i] = eval(xr)
            y[i] = eval(yr)
        return x, y
    else:
        print >> sys.stderr, 'Manca il file %s' % (fname_template % name, )
        #sys.exit(1)
        return None, None
Example #38
0
    def get_train_data(n_pos=31929, n_neg=164863, k=12):
        '''
        megre positive and negative examples
        '''
        suff = str(k)
        X_name = 'train_data_' + suff + '.npy'
        y_name = 'labels_' + suff + '.npy'
        if not (os.path.exists(X_name) and os.path.exists(y_name)):
            X_train_face, y_train_face = Datasets.get_train_face_wider_data(
                k=k)
            #X_pos = X_train_face[y_train_face==1]
            X_pos = X_train_face
            X_aflw, y_train_face_aflw = Datasets.get_aflw_face_data(k=k)
            X_pos = sp.vstack([X_pos, X_aflw])
            X_train_non_face, y_train_non_face = Datasets.get_train_non_face_data(
                k=k)
            print('c1_pos:', len(X_pos))
            if len(X_train_face[y_train_face == 0]) > 0:
                X_neg = sp.vstack(
                    (X_train_face[y_train_face == 0], X_train_non_face))
            else:
                X_neg = X_train_non_face
            X_pos = shuffle(X_pos, random_state=42)
            X_neg = shuffle(X_neg, random_state=42)
            X_pos = X_pos[:n_pos]
            X_neg = X_neg[:n_neg]

            n_neg = len(X_neg)
            n_pos = len(X_pos)
            y_pos = sp.ones(n_pos, int)
            y_neg = sp.zeros(n_neg, int)
            X = sp.vstack((X_pos, X_neg))
            y = sp.hstack((y_pos, y_neg))
            X, y = shuffle(X, y, random_state=42)
            sp.save(X_name, X)
            sp.save(y_name, y)
        else:
            X = sp.load(X_name)
            y = sp.load(y_name)
        print("Done", "Positive examples count, Negative exapmples count:",
              len(y[y == 1]), len(y[y == 0]))
Example #39
0
def resample(name='NAO1', fs=2., **kwargs):
	# load the data
	datpath = '../output/'
	datname = 'nao_indices_2013-10-05_minmax.npz'
	data = sp.load(datpath + datname)
	NAO = data[name].tolist()
	# interpolate the index to an evenly sampled time grid
	fs = 4. # := sampling frq = 4 => 4 samples/yr => inter-sample time = 0.25 yr
	f_nao1 = interp1d(NAO['time'].squeeze(), NAO['index'].squeeze())
	t_nao1_new = sp.arange(NAO['time'].min(), NAO['time'].max() + 0.1, 1/fs)
	y_nao1_new = f_nao1(t_nao1_new)
	return t_nao1_new, y_nao1_new
def load_def_data(path_to_model, file_name_prefix, iter_type=None, iter_val=None):
    """Load saved def_data for the batch."""
    if iter_type=='_norm':
        file_name_data   = file_name_prefix + '_data' +iter_type+'_%.1f.npz'%iter_val
    elif iter_type=='_iter':
        file_name_data   = file_name_prefix + '_data' +iter_type+'_%i.npz'%iter_val        
    else:
        print('error - fix load_batch func!')

    data = scipy.load(path_to_model+file_name_data)['a'][()]

    return data
Example #41
0
def read_fft(genre_list, base_dir=GENRE_DIR):
    X = []
    y = []
    labels = []
    for label, genre in enumerate(genre_list):
        genre_dir = os.path.join(base_dir, genre, "*.fft.npy")
        file_list = glob.glob(genre_dir)
        for fn in file_list:
            fft_features = scipy.load(fn)
            X.append(fft_features[:1000])
            y.append(label)
    return np.array(X), np.array(y)
Example #42
0
def quadrant_image(i=None):
    global _quadrant
    if _quadrant is None:
        path = Path('cache/quadrant.npy')
        if not path.exists():
            ims = [
                _quadrant_image(i) for i in range(1, COUNTS['quadrant'] + 1)
            ]
            sp.save(path, sp.stack(ims))
        _quadrant = sp.load(path)
    ims = _quadrant[i - 1] if i is not None else _quadrant
    return ims
def regjac(NRadj):
    NRadjHalf = NRadj / 2
    mpr = sp.load('mprior.npy')
    Npr = len(mpr)

    Nregsmooth = 2 * 15820  # 2 times the number of connections between adjustable rock-types
    Nreglocalxz = 8000  # Number of adjustable rock-types
    Nreg = Nregsmooth + Nreglocalxz + Npr
    sp.save('Nreg.npy', Nreg)

    rJac = lil_matrix((Nreg, NRadj))
    x = 0

    # Create horizontal smoothing of log10kx (perm index 1):
    for i in range(0, 80):
        for j in range(0, 99):
            rJac[x, j + i * 100] = 1
            rJac[x, j + i * 100 + 1] = -1
            x += 1

    # Create vertical smoothing of log10kx (perm index 1):
    for i in range(0, 79):
        for j in range(0, 100):
            rJac[x, j + i * 100] = 1
            rJac[x, j + (i + 1) * 100] = -1
            x += 1

    # Create horizontal smoothing of log10kz (perm index 3):
    for i in range(0, 80):
        for j in range(0, 99):
            rJac[x, j + i * 100 + NRadjHalf] = 1
            rJac[x, j + i * 100 + 1 + NRadjHalf] = -1
            x += 1

    ## Create vertical smoothing of log10kz (perm index 3):
    for i in range(0, 79):
        for j in range(0, 100):
            rJac[x, j + i * 100 + NRadjHalf] = 1
            rJac[x, j + (i + 1) * 100 + NRadjHalf] = -1
            x += 1

    # Add regularization to make log10kx similar to log10kz:
    for i in range(0, Nreglocalxz):
        rJac[x, i] = 1
        rJac[x, i + NRadjHalf] = -1
        x += 1

    # Add prior paramater regularisation:
    for i in range(0, Npr):
        rJac[x, i] = 1 * 0.001
        x += 1

    return csr_matrix(rJac)
Example #44
0
def read_fft_features(path, value):
    features = []
    t = time.time()
    for filename in os.listdir(path):
        if filename.endswith(".npy"):
            real_path = "{0}/{1}".format(path, filename)
            fft_features = scipy.load(real_path)
            features.append(np.asarray(fft_features[:1000]))
    print time.time() - t
    length = len(features)
    data = np.array(features)
    values = np.array([value] * length)
    return data, values
def read_fft(genre_list, base_dir):
	X = []
	y = []
	for label, genre in enumerate(genre_list):
		
		genre_dir = os.path.join(base_dir, genre, "*.fft.npy")
		# print genre_dir
		file_list =glob.glob(genre_dir)
		for fn in file_list:
			fft_features = scipy.load(fn)
			X.append(fft_features[:1000])
			y.append(label)
	return np.array(X), np.array(y)
def reconstruct_target(target_file,base_prefix,regul = None):
	"""
		Reconstruct the target in 'target_file' using constrained, 
		and optionally regularized, least square optimisation.
		
		arguments :
			target_file : file contaiing the target to fit
			base_prefix : prefix for the files of the base.
	"""
	
	vlist = read_vertex_list(base_prefix+'_vertices.dat')
	t = read_target(target_file,vlist)
	U = load(base_prefix+"_U.npy").astype('float')
	S = load(base_prefix+"_S.npy").astype('float')
	V = load(base_prefix+"_V.npy").astype('float')

	ntargets,dim = V.shape
	nvert = len(t)
	pt = dot(U.T,t.reshape(nvert*3,1))
	pbase = S[:dim].reshape(dim,1)*V.T
	A = param('A',value = matrix(pbase))
	b = param('b',value = matrix(pt))
	x = optvar('x',ntargets)

	if regul is None : prob = problem(minimize(norm2(A*x-b)),[x>=0.,x<=1.])
	else : prob = problem(minimize(norm2(A*x-b) + regul * norm1(x)),[x>=0.,x<=1.])
	
	prob.solve()
	
	targ_names_file = base_prefix+"_names.txt"
	with open(targ_names_file) as f :
		tnames = [line.strip() for line in f.readlines() ]
	tnames.sort()
	
	base,ext = os.path.splitext(target_file)
	bs_name = base+".bs"
	with open(bs_name,"w") as f :
		for tn,v in zip(tnames,x.value):
			if v >= 1e-3 : f.write("%s %0.3f\n"%(tn,v))
Example #47
0
def reconstruct_target(target_file,base_prefix,regul = None):
	"""
		Reconstruct the target in 'target_file' using constrained, 
		and optionally regularized, least square optimisation.
		
		arguments :
			target_file : file contaiing the target to fit
			base_prefix : prefix for the files of the base.
	"""
	
	vlist = read_vertex_list(base_prefix+'_vertices.dat')
	t = read_target(target_file,vlist)
	U = load(base_prefix+"_U.npy").astype('float')
	S = load(base_prefix+"_S.npy").astype('float')
	V = load(base_prefix+"_V.npy").astype('float')

	ntargets,dim = V.shape
	nvert = len(t)
	pt = dot(U.T,t.reshape(nvert*3,1))
	pbase = S[:dim].reshape(dim,1)*V.T
	A = param('A',value = matrix(pbase))
	b = param('b',value = matrix(pt))
	x = optvar('x',ntargets)

	if regul is None : prob = problem(minimize(norm2(A*x-b)),[x>=0.,x<=1.])
	else : prob = problem(minimize(norm2(A*x-b) + regul * norm1(x)),[x>=0.,x<=1.])
	
	prob.solve()
	
	targ_names_file = base_prefix+"_names.txt"
	with open(targ_names_file) as f :
		tnames = [line.strip() for line in f.readlines() ]
	tnames.sort()
	
	base,ext = os.path.splitext(target_file)
	bs_name = base+".bs"
	with open(bs_name,"w") as f :
		for tn,v in zip(tnames,x.value):
			if v >= 1e-3 : f.write("%s %0.3f\n"%(tn,v))
Example #48
0
def main( fname ):
    """Run on sample in fname"""

    lda = sc.load( fname )
    k, d, a0, O, X = lda['k'], lda['d'], lda['a0'], lda['O'], lda['data']
    X1, X2, X3 = X

    P, T = sample_moments( X1, X2, X3, k, a0 )

    O_ = recover_topics( P, T, k, a0 )
    O_ = closest_permuted_matrix( O.T, O_.T ).T

    print k, d, a0, norm( O - O_ )
 def get_train_data(n_pos = 31929, n_neg = 164863,k=12):        
     '''
     megre positive and negative examples
     '''
     suff = str(k)        
     X_name = 'train_data_'+ suff +  '.npy'
     y_name = 'labels_'+ suff + '.npy'        
     if not(os.path.exists(X_name) and os.path.exists(y_name)):
         X_train_face,y_train_face  = Datasets.get_train_face_wider_data(k = k)
         #X_pos = X_train_face[y_train_face==1]
         X_pos = X_train_face
         X_aflw,y_train_face_aflw  = Datasets.get_aflw_face_data(k = k)
         X_pos = sp.vstack( [X_pos,X_aflw] )
         X_train_non_face,y_train_non_face =  Datasets.get_train_non_face_data(k = k)
         print('c1_pos:',len(X_pos))
         if len(X_train_face[y_train_face==0]) > 0:
             X_neg = sp.vstack( (X_train_face[y_train_face==0],X_train_non_face) )
         else:
             X_neg = X_train_non_face
         X_pos = shuffle(X_pos,random_state=42)
         X_neg = shuffle(X_neg,random_state=42)
         X_pos = X_pos[:n_pos]
         X_neg = X_neg[:n_neg]
         
         n_neg = len(X_neg)
         n_pos = len(X_pos)
         y_pos = sp.ones(n_pos,int)
         y_neg = sp.zeros(n_neg,int)
         X = sp.vstack((X_pos,X_neg))
         y = sp.hstack( (y_pos,y_neg) )
         X,y = shuffle(X,y,random_state=42)
         sp.save(X_name,X)
         sp.save(y_name,y)
     else:
         X = sp.load(X_name)
         y = sp.load(y_name)
     print("Done","Positive examples count, Negative exapmples count:",len(y[y==1]),len(y[y==0]))
     
Example #50
0
def play_movie(image_dir, fixation_file=None, fps=30):

    pygame.init()

    clock = pygame.time.Clock()
    flags = pygame.NOFRAME
    depth = 32
    surf = pygame.display.set_mode((640,480), flags, depth)

    im_base_name = "cam1_frame_"
    im_extension = ".bmp"

    if fixation_file is not None:
        fixations = sp.load(fixation_file)
        fixations[sp.isnan(fixations)] = -100
        fixations[abs(fixations) > 1000] = 1000
    else:
        fixations = []

    try:
        pygame.event.clear()
        pygame.event.set_allowed(None)
        pygame.event.set_allowed(pygame.KEYDOWN)
        for framenum in xrange(4000):
            im_name = "".join([im_base_name, str(framenum), im_extension])
            im_path = os.path.join(image_dir, im_name)
            im = cv2.imread(im_path)
            if len(fixations) != 0:
                if sp.floor(fixations[framenum, 1]) == 237:
                    continue
                cv2.circle(im, tuple(fixations[framenum]), 3, (255, 255, 255))
            im_buf = im.tostring()
            im = pygame.image.frombuffer(im_buf, (640,480), "RGB")
            surf.blit(im, (0,0))
            pygame.display.flip()
            print "Frame", framenum
            for event in pygame.event.get():
                if event.type == pygame.KEYDOWN:
                    if event.key == pygame.K_ESCAPE:
                        sys.exit(0)

            clock.tick(30)
    except KeyboardInterrupt:
        print "Quitting"
    except Exception as e:
        print "An exception!"
        print e
        raise
    finally:
        pygame.quit()
def compare_error_bounds( model_fname, log_fname, delta = 0.1 ):
    """Compare error bounds theoretical analysis"""
    gmm = GaussianMixtureModel.from_file( model_fname )
    k, d, M, w = gmm.k, gmm.d, gmm.means, gmm.weights

    P, T = exact_moments( M, w )

    lg = sc.load( log_fname )

    # TODO: Use concentration bounds on aerr_P12
    n_M, sk_M = lg["norm_M_2"], lg["s_k_M"], 
    e_P, e_T = lg["aerr_P_2"], lg["aerr_T"], 
    n_P, sk_P, n_T = lg["norm_Pe_2"], lg["s_k_P"], lg["norm_Te"]
    w_min = min(w)

    # TODO: Ah, not computing sigma2! 

    # alpha_P and \beta_P
    a_P = e_P/sk_P
    b_P = a_P/(1-a_P)

    e_Wb = 2/sqrt(sk_P) * b_P
    e_W = lg["aerr_W_2"]

    e_Twb = 1/sqrt(sk_M * (1-a_P)) * e_T + n_T/sk_M * (1 + 1/sqrt(1-a_P) + 1/(1-a_P)) * e_W
    e_Tw = lg["aerr_Tw"]

    e_Lb = e_Tw
    e_L = lg["aerr_lambda"]

    D_M = column_sep( M )
    D_Tw = delta/(sqrt(sc.e) * k**2 * (1+sqrt(2 * log(k/delta)))) * D_M
    e_vb = 4 * sqrt(2) * e_Tw / D_Tw
    e_v = lg["aerr_v_col"]

    e_Wtb = 2 * sqrt( n_P + e_P ) * b_P
    n_Wtb = sqrt( n_P + e_P )

    e_mub = e_Lb + (1+1/sqrt(w_min)) * n_Wtb * e_vb + e_Wtb
    e_mu = lg["aerr_M_col"]

    print "A\t\tbound\t\tactual"
    print "W\t\t%f\t\t%f" % (e_Wb, e_W)
    print "Tw\t\t%f\t\t%f" % (e_Twb, e_Tw)
    print "L\t\t%f\t\t%f" % (e_Lb, e_L)
    print "v\t\t%f\t\t%f" % (e_vb, e_v)
    print "mu\t\t%f\t\t%f" % (e_mub, e_mu)
    return [(e_W/e_Wb), (e_Tw/e_Twb), (e_L / e_Lb), (e_v/e_vb), (e_mu / e_mub),]
Example #52
0
def load(file, metafile=None):
    """Open a .npy file and load it into memory as an info_aray.

    Similar to the numpy.load function.  Does not support memory
    mapping (use open_memmap).

    Parameters
    ----------
    file: file handle or str
        .npy file or file name to read the array from.
    metafile: str
        File name for which the `info` attribute of the returned InfoArray
        will be read from. Default is None, where the it is
        assumed to be the file name associated with `file` with ".meta"
        appended. If the file does not exist, the info attribute is initialized
        to an empty dictionary.

    Returns
    -------
    iarray: InfoArray object
    """

    # Load the data from .npy format.
    array = sp.load(file)

    # Figure out what the filename for the meta data should be.
    if metafile is None:
        try:
            fname = file.name
        except AttributeError:
            fname = file
        metafile = fname + ".meta"

    # Read the meta data.
    if os.path.isfile(metafile):
        info_fid = open(metafile, 'r')
        try:
            infostring = info_fid.readline()
        finally:
            info_fid.close()
        info = safe_eval(infostring)
    else:
        info = {}

    # Construct the infor array.
    array = info_header.InfoArray(array, info)

    return array
Example #53
0
    def load_state(self, file_name, autogrow=False):
        toload = sp.load(file_name)
        
        try:
            if toload.shape[0] != 9:
                print "Error loading state: Bad data!"
                return
                
            if autogrow and toload[0].shape[0] != self.A.shape[0]:
                newN = toload[0].shape[0] - 3
                print "Changing N to: %u" % newN
                self.grow_left(newN - self.N)
                
            if toload[0].shape != self.A.shape:
                print "Cannot load state: Dimension mismatch!"
                return
            
            self.A = toload[0]
            self.l[0] = toload[1]
            self.uni_l.r = toload[2]
            self.uni_l.K_left = toload[3]
            self.r[self.N] = toload[4]
            self.r[self.N + 1] = self.r[self.N]
            self.uni_r.l = toload[5]
            self.uni_r.K = toload[6]
            
            self.grown_left = toload[7][0, 0]
            self.grown_right = toload[7][0, 1]
            self.shrunk_left = toload[7][1, 0]
            self.shrunk_right = toload[7][1, 1]
            
            self.uni_l.A = self.A[0]
            self.uni_l.l = self.l[0]
            self.uni_l.l_before_CF = self.uni_l.l
            self.uni_l.r_before_CF = self.uni_l.r
            
            self.uni_r.A = self.A[self.N + 1]
            self.uni_r.r = self.r[self.N]
            self.uni_r.l_before_CF = self.uni_r.l
            self.uni_r.r_before_CF = self.uni_r.r

            print "loaded."

            return toload[8]
            
        except AttributeError:
            print "Error loading state: Bad data!"
            return
Example #54
0
def compare_error_bounds( model_fname, log_fname, delta = 0.1 ):
    """Compare error bounds theoretical analysis"""

    mvgmm = MultiViewGaussianMixtureModel.from_file( model_fname )
    k, d, M, w = mvgmm.k, mvgmm.d, mvgmm.means, mvgmm.weights
    M1, M2, M3 = M

    P12, P13, P123 = exact_moments( w, M1, M2, M3 )
    U1, _, U2 = svdk( P12, k )
    _, _, U3 = svdk( P13, k )
    U2, U3 = U2.T, U3.T

    lg = sc.load( log_fname )

    # TODO: Use concentration bounds on aerr_P12
    e_P12, e_P13, e_P123 = lg["aerr_P12_2"], lg["aerr_P13_2"], lg["aerr_P123"], 
    n_P12, n_P13, n_P123 = lg["norm_P12_2"], lg["norm_P13_2"], lg["norm_P123"], 

    P12_ = U1.T.dot( P12 ).dot( U2 )
    n_P12i = norm( inv(P12_) )
    K_P12 = condition_number( P12_ )

    P13_ = U1.T.dot( P13 ).dot( U3 )
    n_P13i = norm( inv(P13_) )

    e_P12ib = n_P12i * K_P12 / (n_P12 - e_P12 * K_P12) * e_P12
    e_B123b = e_P123 * n_P12i + n_P123 * e_P12ib
    e_B123 = lg["aerr_B123"]

    D_M3 = column_gap( U3.T.dot( M3 ), k )
    D_L = delta/(sqrt(sc.e) * k**2 * (1+sqrt(2 * log(k/delta)))) * D_M3
    n_R, K_R = lg["norm_R_2"], lg["K_R"]
    e_Lb = k**3 * K_R**2 * n_R / D_L * e_B123
    # TODO: Adjust because this is a bound on Frob. norm, not spec. norm
    e_Lb = e_Lb * k
    e_L = lg["aerr_L_2"]

    n_mu = max(norm(M3.T[i]) for i in xrange(k))
    e_mub = sqrt(k) * e_L + 2 * n_mu * n_P13i * e_P13/ (1 - n_P13i * e_P13) 
    e_mu = lg["aerr_M3_col"]

    print "A\t\tbound\t\tactual"
    print "B123\t\t%f\t\t%f" % (e_B123b, e_B123)
    print "L\t\t%f\t\t%f" % (e_Lb, e_L)
    print "M3\t\t%f\t\t%f" % (e_mub, e_mu)
    return [ e_B123/e_B123b, e_L/e_Lb, e_mu/e_mub,]
Example #55
0
 def __init__(self,fn):
     # data['n_x'] is the x polarization data for the nth antenna.
     # data['n_y'] is the y polariztion data.  Each data element is a 1001
     # element array.  We want to coalesce the polarization pairs into
     # 2x1001 element arrays, because both polarizations have to be used
     # together, and we can concatenate all the antennas vertically and find
     # the optimal frequency.
     data = sp.load(fn)
     self.ants = []
     self.reversemap = {}
     # iterate over physical antennas
     for antsx,antnum in enumerate(set( 
             (int(i.split('_')[0]) for i in data.files) 
             )):
         ant = sp.array(( data["{}_x".format(antnum)], 
                          data["{}_y".format(antnum)] ))
         ant = sp.atleast_2d(ant)
         self.ants.append(ant)
         self.reversemap[antsx] = antnum
Example #56
0
    def load_state(self, file):
        """Loads the parameter tensors self.A from a file.

        The saved state must contain the right number of tensors with
        the correct shape corresponding to self.N and self.q.
        self.D will be recovered from the saved state.

        Parameters
        ----------
        file ; path or file
            The file to load the state from.
        """
        tmp_A = sp.load(file)

        self.D[0] = 1
        for n in xrange(self.N):
            self.D[n + 1] = tmp_A[n + 1].shape[2]
        self._init_arrays()
        self.A = tmp_A
def get_ls_and_Cls_from_observers(parameterfile,observerfile):
    print "This function has not been tested yet"
    f = open(parameterfile,'r')    
    parameters = cPickle.load(f)
    f.close()
#    parameters = sp.load(path + 'parameters.save')
    observers = sp.load(observerfile)



#    bin_number = 0
    bin_number = 2
#    bin_number = 11
    bn = bin_number
    for mind_bin, maxd_bin in zip(parameters.bindistances[bn:bn+1],parameters.bindistances[bn+1:bn+2]):
        d = sp.mean((mind_bin,maxd_bin))
        ls = observers[0].ls[bin_number]
        Cls = sp.array([observer.cls[bin_number] for observer in observers])
    #    cls = observers[0].cls[bin_number]
        bin_number = bin_number+1

    return ls, Cls
def getAcquaintances(A, clusters, person = 'Barack_Obama',
    outputfile = '../data/people_in_the_cluster.txt',
    t2i = '../data/title-ID_dict.pickle',
    i2t = '../data/ID-title_dict.pickle',
    pid2ind = '../data/person_id2ind_6800.pickle', 
    printRating = 0):
        
    t2i = scipy.load(t2i)
    pid2ind = scipy.load(pid2ind)

    if person in t2i:
        ix = t2i[person]
        if ix:
            print 'Wiki ID is', ix
        else:
            print 'Wiki ID is None!'
            return
    else:
        print 'Person isn\'t found in t2i!'
        return
    
    # get person's dense-dense id (in 15k array)
    ix = pid2ind[ix]
    
    if ix:
        print 'In dense matrix he/she is', ix
    else:
        print 'Peron\'s id not found in t2i!'
        return
    
    # find the cluster that contains the person
    cluster = np.where(map(lambda x: ix in x, clusters))[0]
    ids = np.array(clusters[cluster[0]])
    
    if len(cluster):
        print person, 'is acquaintant with', len(ids), 'people'
    else:
        print 'Person isn\'t found in any cluster!'
        return
    
    # create inverse index-to-person dictionary
    ind2pid = dict(zip(pid2ind.values(), pid2ind.keys()))
    
    # get ratings
    # ratings = A[ids,:].sum(axis=1)  # among all vertex
    ratings = A[ids,:][:,ids].sum(axis=1) # only inside cluster
    ratings = np.array(ratings).flatten()
    
    # sort people by rating
    ix = ratings.argsort()[::-1]
    ids = ids[ix]
    ratings = ratings[ix]
    
    i2t = scipy.load(i2t)
    ids = map(ind2pid.get, ids)
    
    # print all the people in this cluster
    if printRating:
        string = '\n'.join([i2t[i]+", "+str(r) for i, r in zip(ids, ratings)])
    else:
        string = '\n'.join(map(i2t.get, ids))
    
    # write to file
    f = open(outputfile, 'w')
    f.write(string)
    f.close()
Example #59
0
    ALvls = scipy.append(ALvls,scipy.array([0.241]))
    ALvls = scipy.sort(ALvls)

    AlphaLvls = []
    for a in ALvls:
        AlphaLvls.append(a)
    DataDict['AlphaLvls'] = AlphaLvls

    AllGenes = scipy.array([])
    for F in GWPValFiles:
        File            = os.path.join(GeneWisePValuePath,F)
        DecomprFile     = File[:-4]
        Trait           = re.sub('_','.',os.path.basename(DecomprFile)[:-4])
        DataDict[Trait] = {}
        os.system('lbzip2 -d -k -f '+File)
        Data                      = scipy.load(DecomprFile)
        PVals                     = Data[1,1:].astype(float)
        Genes                     = Data[0,1:]
        AllGenes                  = scipy.append(AllGenes,Genes)
        AllGenes                  = scipy.unique(AllGenes)
        NGenes                    = len(Genes)
        DataDict[Trait]['NGenes'] = NGenes
        os.remove(DecomprFile)
        for Alpha in AlphaLvls:
            BH    = statsmodels.stats.multitest.multipletests(pvals=PVals,
                                                              alpha=Alpha,
                                                              method='fdr_bh',
                                                              returnsorted=False)
            DataDict[Trait]['Alpha_'+str(Alpha)]     = Alpha
            DataDict[Trait]['AlphaBonf_'+str(Alpha)] = BH[3]
            BHPVals  = scipy.array(['BHp_value_alpha='+str(Alpha)])
                               unpack=True)

    scipy.save(file='BackgroundGeneSet.npy',
               arr=BGSData)
    os.system('lbzip2 BackgroundGeneSet.csv')
    os.system('lbzip2 BackgroundGeneSet.npy')

if('-e' in sys.argv):
    os.system('lbzip2 -d BackgroundGeneSet.csv.bz2')
    fr        = open('BackgroundGeneSet.csv','r')
    BGSHeader = fr.readline().strip().split('|')
    fr.close()
    os.system('lbzip2 BackgroundGeneSet.csv')

#    os. system('lbzip2 -d BackgroundGeneSet.npy.bz2')
    BGSData = scipy.load('BackgroundGeneSet.npy')
#    os. system('lbzip2 BackgroundGeneSet.npy.bz2')
    print BGSHeader
    print len(BGSData),len(BGSData[0])
#   GO term enrichment
    GODBIndices       = scipy.where(BGSData[BGSHeader.index('GODB')]=='Gene Ontology')[0]
    GenesWithGOEntry  = scipy.unique(BGSData[BGSHeader.index('GeneEntrezID'),GODBIndices])
    NGenesWithGOEntry = len(GenesWithGOEntry)

    QueryGeneSymbols     = []
    fr                   = open(sys.argv[sys.argv.index('-e')+1],'r')
    for Line in fr:
        QueryGeneSymbols.append(Line.strip())
    fr.close()
    GeneSymbolArray,\
    Ind                  = scipy.unique(BGSData[BGSHeader.index('GeneSymbol(s)'),GODBIndices],return_index=True)