Beispiel #1
0
 def setMs(self, nSensors=10):
     '''Creates an n-grid mesh across the surface for the 3D case '''
     
     self.nSen = nSensors*nSensors
     '''First find the appropriate 10 indexes within the PML & illumination region '''
     indx = np.round(np.linspace(self.npml+5,self.nx-self.npml-5, nSensors)).astype(int)-1;
     indx = np.unique(indx)
     # print (indx + 1)
     ''' make the exact X operator using strides '''
     xl,zl = np.meshgrid(indx+1,indx)
     Mx = sparse.dok_matrix((self.nSen,(self.nx+1)*self.ny*self.nz))
     
     for ix,loc in enumerate(zip(xl.flatten(),zl.flatten())):
         pts = loc[0]*self.ny*self.nz + self.div*self.nz + loc[1]
         Mx[ix,pts] = 1.0
     
     xl,zl = np.meshgrid(indx,indx)
     My = sparse.dok_matrix((self.nSen,self.nx*(self.ny+1)*self.nz))
     
     for ix,loc in enumerate(zip(xl.flatten(),zl.flatten())):
         pts = loc[0]*(self.ny+1)*self.nz + (self.div+1)*self.nz + loc[1]
         My[ix,pts] = 1.0
         
         
     '''make the exact Z operator using strides '''
     xl,zl = np.meshgrid(indx,indx+1)
     Mz = sparse.dok_matrix((self.nSen,self.nx*self.ny*(self.nz+1)))
     
     for ix,loc in enumerate(zip(xl.flatten(),zl.flatten())): 
         pts = loc[0]*self.ny*(self.nz+1) + self.div*(self.nz+1) + loc[1]
         Mz[ix,pts] = 1.0        
     
     ''' smush together in block diagonal format '''
     self.Ms = sparse.block_diag((Mx,My,Mz),'csr')
     self.nSen = 3*self.nSen
def build_sparse_matrix(list_of_dicts, vector_length, orient='columns', verbose=False):
    """
    Function for building sparse matrix from list of dicts
    :param list_of_dicts: list of dictionaries representing sparse vectors
    :param vector_length: number of values in dense representation of sparse vector
    :param orient: build matrix by rows or columns - default is columns
    :return: sparse matrix
    """
    if orient == 'columns':
        columns = len(list_of_dicts)
        matrix = dok_matrix((vector_length, columns))
        for column, vector in enumerate(list_of_dicts):
            if verbose:
                print("Building matrix {:0.2%}".format(column / columns), end='\r')
            for term in vector.keys():
                matrix[int(term), column] = vector[term]
    elif orient == 'rows':
        rows = len(list_of_dicts)
        matrix = dok_matrix(shape=(rows, vector_length))
        for row, vector in enumerate(list_of_dicts):
            if verbose:
                print("Building matrix {:0.2%}".format(row / rows), end='\r')
            for term in vector.keys():
                matrix[row, term] = vector[term]
    else:
        raise ValueError('Orient must be either \'columns\' or \'rows\'')

    print("Matrix complete.                    ")
    return csc_matrix(matrix)
def get_Heisenberg_H(Sx,Sy,Sz,b,n):
    '''Build the Hamiltonian for the N-particle system.'''
    
    D = Sx.get_shape()[0]                   # Dimensions of the spin matrices.
    
    Sz_sum = dok_matrix((D**N,D**N))        # Contribution from the B field.
    Sx_int_sum = dok_matrix((D**N,D**N))    # Contribution from interactions.
    Sy_int_sum = dok_matrix((D**N,D**N))
    Sz_int_sum = dok_matrix((D**N,D**N))
    
    S_int_sum = [[Sx_int_sum,Sy_int_sum,Sz_int_sum],[Sx,Sy,Sz]]
    
    for k in range(N):
        # Compute the B-field contribution in the Hamiltonian.
        Sz_sum += get_full_matrix(Sz,k,N)
        
        # Compute the S-S interaction dot product in the Hamiltonian.
        for i in range(3):                  # For x, y and z orientations.
            i_k = get_full_matrix(S_int_sum[1][i],k,N)
            i_k_1 = get_full_matrix(S_int_sum[1][i],k+1,N)
            
            S_int_sum[0][i] += i_k*i_k_1
        S_dot_S_sum = Sx_int_sum + Sy_int_sum + Sz_int_sum
        
    # Interaction Hamiltonian.    
    H = -b/2 * Sz_sum - S_dot_S_sum
    
    return H
    def read_given_train_test(self, train_file, test_file):
        """
        read given data set
        """
        users, items = set(), set()
        ratings = list()
        with codecs.open(train_file, mode="r", encoding="utf-8") as read_file:
            for line in read_file:
                user_item_rating = re.split('\t|,|::', line.strip())
                user_id = int(user_item_rating[0])
                item_id = int(user_item_rating[1])
                rating = int(user_item_rating[2])
                users.add(user_id)
                items.add(item_id)
                ratings.append((user_id, item_id, rating))

        # Convert
        user_num, item_num = len(users), len(items)
        users_dict = {user_id: index for index, user_id in enumerate(list(users))}
        items_dict = {item_id: index for index, item_id in enumerate(list(items))}
        train_matrix = dok_matrix((user_num, item_num))
        test_matrix = dok_matrix((user_num, item_num))
        for user_id, item_id, rating in ratings:
            train_matrix[users_dict[user_id], items_dict[item_id]] = rating

        with codecs.open(test_file, mode='r', encoding='utf-8') as read_file:
            for line in read_file:
                user_item_rating = re.split('\t|,|::', line.strip())
                user_id = int(user_item_rating[0])
                item_id = int(user_item_rating[1])
                rating = int(user_item_rating[2])
                test_matrix[users_dict[user_id], items_dict[item_id]] = rating
        return train_matrix, test_matrix
def write_cv_data(K, data_dir, idx, W, L, T, D, N, phi, alpha, beta, chains):
    B = generate_lda(T, W, D, N, phi, alpha)
    # split cv data
    B_sparse = csr_matrix(B)
    Bs = [dok_matrix((D, W), dtype=np.float32) for k in range(K)]
    test_counts = [dok_matrix((D, W), dtype=np.float32) for k in range(K)]
    for d in range(B_sparse.shape[0]):
        crow = B_sparse[d,:].tocoo()
        list_of_tokens = []
        for term_idx,count in itertools.izip(crow.col, crow.data):
            list_of_tokens += [term_idx]*count
        list_of_tokens = list(np.random.permutation(np.array(list_of_tokens)))
        kf = KFold(len(list_of_tokens), n_folds=K)
        for k,(train, test) in enumerate(kf):
            l = [list_of_tokens[i] for i in train]
            dict_of_counts = collections.Counter(l)
            for w,count in dict_of_counts.iteritems():
                Bs[k][d,w] = count
            l = [list_of_tokens[i] for i in test]
            dict_of_counts = collections.Counter(l)
            for w,count in dict_of_counts.iteritems():
                test_counts[k][d,w] = count
    Bs = [csr_matrix(i) for i in Bs]
    test_counts = [csr_matrix(i) for i in test_counts]
    for i,counts in enumerate(test_counts):
        pickle.dump(counts,
            open(os.path.join(data_dir, 'counts_{}.pkl'.format(i)), 'w'))
    write_pb_cv(data_dir, idx, W, T, D, alpha, beta, Bs, write_params=False)
    write_stan_cv(data_dir, idx, W, T, D, alpha, beta, Bs, chains=chains, write_params=False)
    write_prism_cv(data_dir, idx, W, T, D, alpha, beta, Bs, write_params=False)
    write_txt_cv(data_dir, idx, Bs, T, alpha, beta, write_params=False)
    def getMatrix(self, date_time0, date_time1, filterBy=(), groupBy=''):
        looks = [l for l in self.looks if date_time0 <= l.date_time < date_time1]
        for l in looks:
            if l.interaction ==1 and l.observed._id in ['SIN', 'MARCADOR', 'FOTO']:
                l.interaction = 0
        if len(filterBy) > 0:
            attribute = filterBy[0]
            values = filterBy[1]
            exec 'looks = [l for l in looks if l.' + attribute + ' in values]'

        m = sp.dok_matrix((37,38))
        t = sp.dok_matrix((37,38))
        d = sp.dok_matrix((37,38))
        studentIds = [p._id for p in self.people if self.id2int(p._id) < 37]
        for l in looks:
            i = min([ self.id2int(l.observer._id), 37 ])
            if l.interaction == 1:
                j = min([ self.id2int(l.observed._id), 37 ])
                m[(i,j)] += 1
            presentStudents = [self.id2int(sId) for sId in studentIds if self.isPresent(sId, l.date_time)]
            presentStudents.append(37)
            for k in presentStudents:
                t[(i,k)] += 1
            if l.interaction == 1 and not j in presentStudents:
                print 'error: ' + l.observer._id + ' mira a ' + l.observed._id + ' pero no esta presente en ' + str(l.date_time)
        #d = self.divide(m, t)
        d = (m.todense()/t.todense())*100
        
        if groupBy == 'gender':
            males = [self.id2int(s._id) for s in self.people if 0 < self.id2int(s._id) < 37 and s.gender == '1']
            females = [self.id2int(s._id) for s in self.people if self.id2int(s._id) < 37 and s.gender == '2']
            m = np.c_[m[:,0].todense(), m[:,males].sum(1), m[:,females].sum(1), m[:,37].todense()]
            t = np.c_[t[:,0].todense(), t[:,37].todense(), t[:,37].todense(), t[:,37].todense()]
            d = (m/t)*100
        return d, m, t
def test_load_branches_medium(case14):
    demand_dict, root, _ = load_buses(case14)
    e2i, _, _ = renumber_buses(demand_dict, root)
    n = len(e2i)
    Ghat = dok_matrix((n, n))
    Bhat = dok_matrix((n, n))
    s_dict = {(0, 1): (499.9131600798035-1526.3086523179554j),
              (0, 4): (102.58974549701888-423.4983682334831j),
              (1, 2): (113.50191923073959-478.1863151757718j),
              (3, 4): (684.0980661495671-2157.855398169159j),
              (3, 6): -478.1943381790359j,
              (4, 5): -396.79390524561546j,
              (5, 10): (195.50285631772607-409.4074344240442j),
              (5, 11): (152.59674404509738-317.5963965029401j),
              (5, 12): (309.89274038379875-610.2755448193116j),
              (6, 7): -567.6979846721543j,
              (6, 8): -909.0082719752751j,
              (8, 9): (390.2049552447428-1036.5394127060915j),
              (8, 13): (142.4005487019931-302.90504569306034j)}
    for (i, j), y in s_dict.iteritems():
        Ghat[i, j] = y.real
        Ghat[j, i] = y.real
        Bhat[i, j] = y.imag
        Bhat[j, i] = y.imag
    branch_list_hat = list(sorted(s_dict.keys()))
    branch_map_hat = {}
    for i, (fbus, tbus) in enumerate(branch_list_hat):
        branch_map_hat[(fbus, tbus)] = i
        branch_map_hat[(tbus, fbus)] = i
    G, B, branch_list, branch_map = load_branches(case14, e2i)
    assert_almost_equal(G.todense(), Ghat.todense())
    assert_almost_equal(B.todense(), Bhat.todense())
    assert branch_list == branch_list_hat
    assert branch_map == branch_map_hat
    def __init__(self, nFeat, nAction, epLen, epsilon=0.0, sigma=1.0, lam=1.0, maxHist=5000):
        self.nFeat = nFeat
        self.nAction = nAction
        self.epLen = epLen
        self.epsilon = epsilon
        self.sigma = sigma
        self.maxHist = maxHist
        self.isRLSVI = (
            epsilon == 0.0
        )  # sample from belief only if epsilon is 0. Setting this here allows epsilon to be changed later (to stop LSVI from exploring further)

        # Make the computation structures
        self.covs = []
        self.thetaMeans = []
        self.thetaSamps = []
        self.memory = []
        for i in range(epLen + 1):
            self.covs.append(sp.identity(nFeat) / float(lam))
            self.thetaMeans.append(sp.dok_matrix((nFeat, 1)))
            self.thetaSamps.append(sp.dok_matrix((nFeat, 1)))
            self.memory.append(
                {
                    "oldFeat": sp.dok_matrix((maxHist, nFeat)),
                    "rewards": sp.dok_matrix((maxHist, 1)),
                    "newFeat": {j: sp.dok_matrix((nAction, nFeat)) for j in range(maxHist)},
                }
            )
Beispiel #9
0
def get_ising_XY_H(Sx,Sy,Sz,b,N):
    '''
    Build the Hamiltonian for the N-particle system using the XY Ising model.
    '''
    D = Sx.get_shape()[0]
    Sx_sum = dok_matrix((D**N,D**N))
    Sy_sum = dok_matrix((D**N,D**N))
    Sz_sum = dok_matrix((D**N,D**N))
    Sz_k_sum = dok_matrix((D**N,D**N))

    for k in range(N):
        Sz_sum += get_full_matrix(Sz,k,N)
        
        if k == 0:
            Sx_k = get_full_matrix(Sx,k,N)
        else:
            Sx_k = Sx_k_1.copy()
        Sx_k_1 = get_full_matrix(Sx,k+1,N)
        Sx_sum += Sx_k.dot(Sx_k_1)
        del Sx_k
        
        if k == 0:
            Sy_k = get_full_matrix(Sy,k,N)
        else:
            Sy_k = Sy_k_1.copy()
        Sy_k_1 = get_full_matrix(Sy,k+1,N)
        Sy_sum += Sy_k.dot(Sy_k_1)
        del Sy_k
    del Sx_k_1,Sy_k_1
        
    H = -b/2 * Sz_sum - (Sx_sum + Sy_sum)
    return H 
Beispiel #10
0
 def __init__(self, programEntities, sim=ssd.correlation):
   cleaner = DataCleaner()
   nusers = len(programEntities.userIndex.keys())
   fin = open("../Data/users.csv", 'rb')
   colnames = fin.readline().strip().split(",")
   self.userMatrix = ss.dok_matrix((nusers, len(colnames) - 1))
   for line in fin:
     cols = line.strip().split(",")
     # consider the user only if he exists in train.csv
     if programEntities.userIndex.has_key(cols[0]):
       i = programEntities.userIndex[cols[0]]
       self.userMatrix[i, 0] = cleaner.getLocaleId(cols[1])
       self.userMatrix[i, 1] = cleaner.getBirthYearInt(cols[2])
       self.userMatrix[i, 2] = cleaner.getGenderId(cols[3])
       self.userMatrix[i, 3] = cleaner.getJoinedYearMonth(cols[4])
       self.userMatrix[i, 4] = cleaner.getCountryId(cols[5])
       self.userMatrix[i, 5] = cleaner.getTimezoneInt(cols[6])
   fin.close()
   # normalize the user matrix
   self.userMatrix = normalize(self.userMatrix, norm="l1", axis=0, copy=False)
   sio.mmwrite("../Models/US_userMatrix", self.userMatrix)
   # calculate the user similarity matrix and save it for later
   self.userSimMatrix = ss.dok_matrix((nusers, nusers))
   for i in range(0, nusers):
     self.userSimMatrix[i, i] = 1.0
   for u1, u2 in programEntities.uniqueUserPairs:
     i = programEntities.userIndex[u1]
     j = programEntities.userIndex[u2]
     if not self.userSimMatrix.has_key((i, j)):
       usim = sim(self.userMatrix.getrow(i).todense(),
         self.userMatrix.getrow(j).todense())
       self.userSimMatrix[i, j] = usim
       self.userSimMatrix[j, i] = usim
   sio.mmwrite("../Models/US_userSimMatrix", self.userSimMatrix)
def Pi(terms,tfidf,w=None,eps=0.15,max_iter=100,tol=1e-08):
    """
    terms は語のリスト
    tfidf は語とtfidf値の辞書のリスト
    """
    # dok_matrixをつくる
    n = len(terms) # 語の種類
    m = len(tfidf) # 文書の数
    # 文書から単語への遷移行列
    A = dok_matrix((m,n))
    for i,d in enumerate(tfidf):
        s = sum(d.values()) # sum of tfidf for each document
        for k,v in d.iteritems():
            j = terms.index(k)
            A[i,j] = v / s
    # 単語から文書への遷移行列
    B = dok_matrix((n,m))
    s = sum(A) # sum of col
    stop = terms.index(w) if w else -1 # 排除する語
    for (i,j),v in A.iteritems():
        if j == stop: continue
        B[j,i] = v / s[0,j]
    # 文書から文書への遷移行列
    C = A * B
    # ランダムウォークの定常状態を求める
    x = one = SP.identity(m)
    for i in range(max_iter):
        x_ = eps * one + (1.0 - eps) * x * C 
        if LA.norm(x_.todense() - x.todense()) < tol:
            break
        x = x_
    return x_
Beispiel #12
0
def read_counts_matrix(counts_path):
    """
    Reads the counts into a sparse matrix (CSR) from the count-word-context textual format.
    """
    words = load_count_vocabulary(counts_path + '.words.vocab')
    contexts = load_count_vocabulary(counts_path + '.contexts.vocab')
    words = list(words.keys())
    contexts = list(contexts.keys())
    iw = sorted(words)
    ic = sorted(contexts)
    wi = dict([(w, i) for i, w in enumerate(iw)])
    ci = dict([(c, i) for i, c in enumerate(ic)])
    
    counts = csr_matrix((len(wi), len(ci)), dtype=np.float32)
    tmp_counts = dok_matrix((len(wi), len(ci)), dtype=np.float32)
    update_threshold = 100000
    i = 0
    with open(counts_path) as f:
        for line in f:
            count, word, context = line.strip().split()
            if word in wi and context in ci:
                tmp_counts[wi[word], ci[context]] = int(count)
            i += 1
            if i == update_threshold:
                counts = counts + tmp_counts.tocsr()
                tmp_counts = dok_matrix((len(wi), len(ci)), dtype=np.float32)
                i = 0
    counts = counts + tmp_counts.tocsr()
    
    return counts, iw, ic
Beispiel #13
0
def out_degree_fraction(cover, weights=None, allow_nan = False):
    '''
    Out Degree Fraction (ODF) of a node in a cluster is the ratio between its number of external (boundary) edges
    and its internal edges.
    '''
    w_attr, remove = __get_weight_attr(cover.graph, 'out_degree_fraction', weights)
    mode = "nan" if allow_nan else 0

    #do this outside the loop because it is computationally expensive
    membership = cover.membership
    external_edges = cover.external_edges()
    degree_per_node = cover.graph.strength(weights=w_attr)
    # Intialize return value
    rv = dok_matrix((cover.graph.vcount(), len(cover))) # Rows = Vertex, cols = Cover
    for i in range(len(cover)):
        ext_edge_per_node = dok_matrix((cover.graph.vcount(), 1))

        for edge in external_edges[i]:
            node_index = edge.source if i in membership[edge.source] else edge.target
            ext_edge_per_node[node_index, 0] += 1.0 if weights is None else edge[w_attr]

        for (node, always_zero), ext_edges_for_this_node in ext_edge_per_node.items():
            rv[node, i] += ext_edges_for_this_node/float(degree_per_node[node]) if degree_per_node[node] != 0 else float(mode)

    __remove_weight_attr(cover.graph, w_attr, remove)
    return rv
def get_exp_val(S,psi):
    '''
    Find the expected values for an observable with a given state.
    S must be a sparse matrix.
    '''
    psi = np.matrix(psi)
    exp_value = dok_matrix(psi.conj()).dot(S.dot(dok_matrix(psi.T)))
    return exp_value[0,0]
Beispiel #15
0
    def evaluate(self,testdataset,pipp_radius=None,pipp_confid=[0.95]):
        """evaluate the instances and return a list of probability intervals
        with the given parameters
        
        :param pipp_radius: overcome default radius built during learning
        :type pipp_radius: float
        :param pipp_confid: set of confidence values used to predict rankings
        :type pipp_confid: list of floats
        :returns: for each value of pipp_confid, retuning voting scores
        :rtype: lists of :class:`~classifip.representations.voting.Scores`
        """
        if pipp_radius != None:
            self.radius=pipp_radius
        dataset=np.array(testdataset).astype(float)
        answers=[]

        if self.normal[0] == True:
            dataset=(dataset-self.normal[2])/self.normal[1]
        
        #build matrix of majority opinions
        majority=dok_matrix((len(self.labels),len(self.labels)))
        for i in self.truerankings:
            majority=majority+i
        for k in range(len(self.labels)):
            for l in range(k)+range(k+1,len(self.labels)):
                if majority[k,l] > majority[l,k]:
                    majority[k,l]=1.
                    majority[l,k]=0.
                elif majority[k,l] < majority[l,k]:
                    majority[l,k]=1.
                    majority[k,l]=0.
                else:
                    majority[l,k]=1.
                    majority[k,l]=1.
        
        for i in dataset:
            #add every neighbours in the given radius
            result=dok_matrix((len(self.labels),len(self.labels)))
            if self.tree.query_ball_point(i,self.radius) !=[]:
                for ind in self.tree.query_ball_point(i,self.radius):
                    result=result+self.truerankings[ind]
            #if no neighbour in radius, take the closest one
            else:
                result=result+self.truerankings[self.tree.query(i)[1]]
            #compute the final scores from the sample matrix for each conf values
            score_val=np.zeros((len(self.labels),2))
            for k in range(len(self.labels)):
                for l in range(k)+range(k+1,len(self.labels)):
        #if no samples for a given comparison, simply use majority
                    if result[k,l]+result[l,k] > 0.:
                        score_val[k,:]+=get_binomial_int(result[k,l]
                                    +result[l,k], result[k,l],pipp_confid)
                    else:
                        score_val[k,:]+=get_binomial_int(majority[k,l]
                                    +majority[l,k], majority[k,l],pipp_confid)                            
            answers.append(Scores(score_val))
        
        return answers
Beispiel #16
0
 def test_increment_edge_ages(self):
     self.soinn.adjacent_mat[0, 1:3] = 1
     self.soinn.adjacent_mat[1:3, 0] = 1
     self.soinn._Soinn__increment_edge_ages(0)
     expected = dok_matrix([[0, 2, 2, 0], [2, 0, 0, 0], [2, 0, 0, 0], [0, 0, 0, 0]])
     np.testing.assert_array_equal(self.soinn.adjacent_mat.toarray(), expected.toarray())
     self.soinn._Soinn__increment_edge_ages(1)
     expected = dok_matrix([[0, 3, 2, 0], [3, 0, 0, 0], [2, 0, 0, 0], [0, 0, 0, 0]])
     np.testing.assert_array_equal(self.soinn.adjacent_mat.toarray(), expected.toarray())
def get_exp_val(S,psi):
    '''
    Find the expected values for an observable with a given state.
    S must be a sparse matrix.
    '''
    #exp_value = np.dot(np.dot(np.conj(psi),S.toarray()),np.transpose(psi))
    psi = np.matrix(psi)
    exp_value = dok_matrix(psi.conj()).dot(S.dot(dok_matrix(psi.T)))
    return exp_value[0,0]
def criaXY(dictionary,total,y,standard):   
    #print "A criar X e Y"
    #calcula o X e Y para as palavras encontradas na review (dictionary), total para indexar as palavras na matriz X(as palavras presentes em dictionary que nao estejam em total nao sao consideradas) e y o lucro de cada filme          
    #print "A criar estrutura "
    X=sparse.dok_matrix((len(dictionary),len(total)),dtype=np.int32)
    #pdb.set_trace()    
    #print "matriz criada"
    Y=sparse.dok_matrix((len(y),1))
    maximo=0
    for indice,movie in enumerate(dictionary):
        #print "Filme:", indice,movie
        for palavra in dictionary[movie]:
                  
            '''            
            print movie
            print "indice:",indice, "\n"
            print "palavra",palavra, "\n"
            print "toltal.index",total.index(palavra),"\n"
            
            print dictionary[movie][palavra], "\n"
            '''
            if palavra not in total:
                pass
            else:
                X[indice,total[palavra]]=int(dictionary[movie][palavra])
        
        Y[indice,0]=float(y[movie])
        if np.abs(Y[indice,0])>maximo:
            maximo=np.abs(Y[indice,0])
        X[indice,total["_____"]]=1
        #print "Processamento concluido", indice,movie
	#print X
	#print total
    if standard==True:
        from sklearn import preprocessing


        #Y_test = preprocessing.scale(Y.todense())
        mediaY=sum(Y.toarray())/Y.shape[0]
        Y=Y-mediaY[0]*np.ones_like(Y.todense())
        Y=sparse.csr_matrix(Y)
        
        #std=np.std(Y.toarray())
        #Y=Y/std
        #Y=sparse.dok_matrix(Y)
        #stdY=1
        #X = as_float_array(X, copy)
        #X_mean = sparse.csr_matrix(X,dtype="float").mean(axis=0)
        #X_mean=np.array(X_mean)[0]
        #X=sparse.dok_matrix(X,dtype="float")
        #for indexe in xrange(len(X_mean)):
            #for indexe_linha in xrange(X.shape[0]):
                #X[indexe_linha,indexe]-=X_mean[indexe]
        return X.tocsr(),Y.tocsr() #,mediaY[0] #,mediaY,stdY,X_mean #X e Y sao matrizes esparcas
    else:
         return X.tocsr(),Y.tocsr() #X e Y sao matrizes esparcas
Beispiel #19
0
    def initlize(self):
        #process syn0 with prod2idx
        f = open(self.prod_vector, "r")
        line = f.readline()
        vocab_size, self.vector_size = map(int, line.split())
        self.syn0 = np.zeros((vocab_size, self.vector_size), dtype=float)

        def add_prod(prod, weights):
            if prod not in self.prod2idx:
                #process word2idx & idx2word
                self.prod2idx[prod] = len(self.idx2prod)
                self.idx2prod.append(prod)
                #process syn0
                self.syn0[self.prod2idx[prod]] = weights

        for line_no, line in enumerate(f):
            parts = line.split()
            if len(parts) != self.vector_size+1:
                raise ValueError("invalid vector on line %s (is this really the text format?)" % (line_no))
            prod, weights = parts[0], list(map(float, parts[1:]))
            add_prod(prod, weights)

        #process user2idx
        with open(self.data_path, "r") as ins:
            for line in ins:
                obj = json.loads(line)
                user_id = obj["user_id"]
                if user_id not in self.user2idx:
                    self.user2idx[user_id] = len(self.idx2user)
                    self.idx2user.append(user_id)

        #process rating matrix
        self.train_score_matrix = dok_matrix((len(self.user2idx), len(self.prod2idx)), dtype=np.float)
        with open(self.train_path, "r") as ins:
            for line in ins:
                obj = json.loads(line)
                user_id = obj["user_id"]
                review_id = obj["review_id"]
                business_id = obj["business_id"]
                stars = obj["stars"]
                user_idx = self.user2idx[user_id]
                prod_idx = self.prod2idx[business_id]
                self.train_score_matrix[user_idx, prod_idx] = stars

        self.test_score_matrix = dok_matrix((len(self.user2idx), len(self.prod2idx)), dtype=np.float)
        with open(self.test_path, "r") as ins:
            for line in ins:
                obj = json.loads(line)
                user_id = obj["user_id"]
                review_id = obj["review_id"]
                business_id = obj["business_id"]
                stars = obj["stars"]
                user_idx = self.user2idx[user_id]
                prod_idx = self.prod2idx[business_id]

                self.test_score_matrix[user_idx, prod_idx] = stars
Beispiel #20
0
def build_U_matrices(G, B):
    S2 = sqrt(2)
    n = G.shape[0]
    Ureal = dok_matrix((n-1, n))
    Ureac = dok_matrix((n-1, n))
    S2 = 2**.5
    for i in range(1, n):
        Ureal[i-1, i] = S2*G[i, :].sum()
        Ureac[i-1, i] = -S2*B[i, :].sum()
    return Ureal, Ureac
Beispiel #21
0
def SynthePsize(params, trueY, h):
	result = None
	if h == 0:
		result = sparse.dok_matrix((params.totalLength, 1))
		result[trueY + 1, 0] = params.syntheticParams.strength
	else:
		result = sparse.dok_matrix((numpy.random.randn(params.totalLength, 1)))

	result[0,0] = 1 #still have bias - might be useful, and at worst will do nothing
	return result
Beispiel #22
0
 def __init__(self, n0, n1, n2):
     self.n0 = n0
     self.n1 = n1
     self.n2 = n2
     self.m0 = n1 * n2
     self.m1 = n2
     self.x_pos = np.zeros(n0 * n1 * n2)
     self.y_pos = np.zeros(n0 * n1 * n2)
     self.k = sparse.dok_matrix((n0 * n1 * n2, n0 * n1 * n2))
     self.rest_lengths = sparse.dok_matrix((n0 * n1 * n2, n0 * n1 * n2))
Beispiel #23
0
    def test_shape_compatibility(self):
        use_solver(useUmfpack=True)
        A = csc_matrix([[1., 0], [0, 2]])
        bs = [
            [1, 6],
            array([1, 6]),
            [[1], [6]],
            array([[1], [6]]),
            csc_matrix([[1], [6]]),
            csr_matrix([[1], [6]]),
            dok_matrix([[1], [6]]),
            bsr_matrix([[1], [6]]),
            array([[1., 2., 3.], [6., 8., 10.]]),
            csc_matrix([[1., 2., 3.], [6., 8., 10.]]),
            csr_matrix([[1., 2., 3.], [6., 8., 10.]]),
            dok_matrix([[1., 2., 3.], [6., 8., 10.]]),
            bsr_matrix([[1., 2., 3.], [6., 8., 10.]]),
            ]

        for b in bs:
            x = np.linalg.solve(A.toarray(), toarray(b))
            for spmattype in [csc_matrix, csr_matrix, dok_matrix, lil_matrix]:
                x1 = spsolve(spmattype(A), b, use_umfpack=True)
                x2 = spsolve(spmattype(A), b, use_umfpack=False)

                # check solution
                if x.ndim == 2 and x.shape[1] == 1:
                    # interprets also these as "vectors"
                    x = x.ravel()

                assert_array_almost_equal(toarray(x1), x, err_msg=repr((b, spmattype, 1)))
                assert_array_almost_equal(toarray(x2), x, err_msg=repr((b, spmattype, 2)))

                # dense vs. sparse output  ("vectors" are always dense)
                if isspmatrix(b) and x.ndim > 1:
                    assert_(isspmatrix(x1), repr((b, spmattype, 1)))
                    assert_(isspmatrix(x2), repr((b, spmattype, 2)))
                else:
                    assert_(isinstance(x1, np.ndarray), repr((b, spmattype, 1)))
                    assert_(isinstance(x2, np.ndarray), repr((b, spmattype, 2)))

                # check output shape
                if x.ndim == 1:
                    # "vector"
                    assert_equal(x1.shape, (A.shape[1],))
                    assert_equal(x2.shape, (A.shape[1],))
                else:
                    # "matrix"
                    assert_equal(x1.shape, x.shape)
                    assert_equal(x2.shape, x.shape)

        A = csc_matrix((3, 3))
        b = csc_matrix((1, 3))
        assert_raises(ValueError, spsolve, A, b)
	def __init__(self, inputfile, userSize, movieSize):
		self.__similarityMatrix = None
		self.__sortedIndexMatrix = None
		self.__inputfile = inputfile
		self.__userSize = userSize
		self.__movieSize = movieSize
		self.__rating_avg = 0.0
		self.__userMovieMatrix = dok_matrix((userSize+1,movieSize+1))
		self.__userMovieOrigin = dok_matrix((userSize+1,movieSize+1))
		self.__userMovieBinary = dok_matrix((userSize+1,movieSize+1))
		self.__resultMatrix = None
def create_lp_matrices(a, min_reviewers_per_paper, max_reviewers_per_paper,
                       min_papers_per_reviewer, max_papers_per_reviewer):
    """This function creates the matrices suitable for running Camillo J. Taylor algorithm
        a: affinity matrix
    """
    npapers = a.shape[0]
    nreviewers = a.shape[1]
    nedges = len(a.nonzero()[0])

    i, j = a.nonzero()
    v = a[i, j]

    # reviewers per paper and papers per reviewer
    ne = sparse.dok_matrix((npapers+nreviewers, nedges), dtype=np.float)
    ne[i, range(nedges)] = 1
    ne[j+npapers, range(nedges)] = 1
    d = np.zeros((1, npapers + nreviewers))
    d[0, 0:npapers] = max_reviewers_per_paper
    d[0, npapers:] = max_papers_per_reviewer

    # at least reviewers_per_paper
    ne_atleast1_rev_per_paper = sparse.dok_matrix((npapers, nedges), dtype=np.int)
    ne_atleast1_rev_per_paper[i, range(nedges)] = -1
    d_atleast1_rev_per_paper = -np.ones((1, npapers))*min_reviewers_per_paper

    # at least papers_per_reviewer
    ne_atleast1_paper_per_rev = sparse.dok_matrix((nreviewers, nedges), dtype=np.int)
    ne_atleast1_paper_per_rev[j, range(nedges)] = -1
    d_atleast1_paper_per_rev = -np.ones((1, nreviewers))*min_papers_per_reviewer

    # append the other constrants where x >= 0 and x <= 1
    # x <= 1
    ne0 = sparse.dok_matrix((nedges, nedges), dtype=np.int)
    ne0[range(nedges), range(nedges)] = 1
    d0 = np.ones((nedges, 1))

    # -x <= 0 => x >= 0
    ne1 = sparse.dok_matrix((nedges, nedges), dtype=np.int)
    ne1[range(nedges), range(nedges)] = -1
    d1 = np.zeros((nedges, 1))

    final_ne = sparse.vstack([ne,
                              ne_atleast1_rev_per_paper,
                              ne_atleast1_paper_per_rev,
                              ne0,
                              ne1])

    final_d = np.vstack((d.T,
                         d_atleast1_rev_per_paper.T,
                         d_atleast1_paper_per_rev.T,
                         d0,
                         d1))

    return v, final_ne, final_d
def get_exp_values(b):
    '''Find the expectation value of the spin for a given b.'''
    H = get_tran_ising_H(Sx,Sz,b,N)
    E,V = eigsh(H,k=4,which='LM')
    E,V = sort_eigs(E,V)
    S__k = get_full_matrix(Sx,1,N)
    
    # Needed to avoid TypeError when converting to dok_matrix.
    psi = np.matrix(V[:,0])
    exp_value = dok_matrix(psi.conj()).dot(S__k.dot(dok_matrix(psi.T)))
    return exp_value[0,0]
Beispiel #27
0
 def bond_life(self):
     """When do bonds first form and are last seen"""
     bstart = sparse.dok_matrix(tuple([self.nb_trajs]*2), int)
     blast = sparse.dok_matrix(tuple([self.nb_trajs]*2), int)
     for t,name in self.enum(ext='bonds'):
         bonds = np.sort(self.p2tr(t)[np.loadtxt(name, int)], 1)
         for a,b in bonds:
             blast[a,b] = t
             if not bstart.has_key((a,b)):
                 bstart[a,b] = t
     return bstart, blast
Beispiel #28
0
 def setMd(self, xrng, yrng, zrng):
     '''Tell me the xrange,yrange, and zrange and Ill
     1) specify nRx,nRy, and nRz
     2) produce a matrix that achieves a 1:1 sampling, self.Md '''
     
     '''set the right dimensions'''
     self.nRx = xrng[1]-xrng[0]
     self.nRy = yrng[1]-yrng[0]
     self.nRz = zrng[1]-zrng[0]
     
     nR = self.nRx*self.nRy*self.nRz
     ''' ok have to use spans:
     loc = i*J*K + j*K + k for row-major ordering '''
     ''' populate the locations in the X grid'''
     #sX = sparse.dok_matrix((self.nx+1,self.ny,self.nz),dtype='bool')
     #sX[xrng[0]+1:xrng[1]+1,yrng[0]:yrng[1],zrng[0]:zrng[1]] = True
     ''' make it an operator '''
     ''' nested for should give reshape-able vectors '''
     cnt = 0
     Mx = sparse.dok_matrix((nR,(self.nx+1)*self.ny*self.nz))
     for x in xrange(xrng[0]+1,xrng[1]+1):
         for y in xrange(yrng[0],yrng[1]):
             for z in xrange(zrng[0],zrng[1]):
                 pts = x*self.ny*self.nz + y*self.nz + z
                 Mx[cnt,pts] = 1.0
                 cnt += 1
     
     '''populate the locations in the Y grid'''
     My = sparse.dok_matrix((nR,self.nx*(self.ny+1)*self.nz))
     cnt = 0
     for x in xrange(xrng[0],xrng[1]):
         for y in xrange(yrng[0]+1,yrng[1]+1):
             for z in xrange(zrng[0],zrng[1]):
                 pts = x*(self.ny+1)*self.nz + y*self.nz + z
                 My[cnt,pts] = 1.0
                 cnt += 1
     
     
     '''populate the locations in the Z grid'''
     Mz = sparse.dok_matrix((nR,self.nx*self.ny*(self.nz+1)))
     cnt = 0
     for x in xrange(xrng[0],xrng[1]):
         for y in xrange(yrng[0],yrng[1]):
             for z in xrange(zrng[0]+1,zrng[1]+1):
                 pts = x*(self.ny)*(self.nz+1) + y*(self.nz+1) + z
                 Mz[cnt,pts] = 1.0
                 cnt += 1
     
     ''' put them all together in a block matrix '''    
     self.Md = spt.vCat([Mx.T,My.T,Mz.T]).T
     print 'Md shape ' + repr(self.Md.shape)
     
     self.x2u = sparse.block_diag((Mx,My,Mz), 'csc').T
     print 'x2u shape ' + repr(self.x2u.shape)
def depths(mask, normals):
    """Reconstructs the depths from normals.
    
    Args:
        normals: width x height x 3 array
    """
    width, height, three = normals.shape
    assert three == 3
    m = dok_matrix((width*height*2, width*height), dtype=float)
    b = np.zeros(width*height*2, dtype=float)
    log.debug('maximal shape: %s', m.shape)
    row = 0
    coords = ConsistentBimap()
    for x in range(width):
        for y in range(height):
            if not mask[x,y]: continue
            elif not (mask[x+1,y] and mask[x,y+1] and mask[x-1,y] and mask[x,y-1]):
                continue
            else:
                # n_z (z(x+1, y) - z(x, y)) = -n_x
                m[row, coords[(x+1,y)]] = 1
                m[row, coords[(x,y)]] = -1
                b[row] = normals[x,y,X]/normals[x,y,Z]
                row += 1

                # n_z (z(x, y+1) - z(x, y)) = -n_y
                m[row, coords[(x,y+1)]] = 1
                m[row, coords[(x,y)]] = -1
                b[row] = normals[x,y,Y]/normals[x,y,Z]
                row += 1

    # Now we know how many pixels are used and we restrict the matrix to the
    # rows needed.
    m_p = dok_matrix((row+1, coords.i), dtype=float)

    for (x,y), v in m.items():
        try:
            m_p[x,y] = v
        except Exception as e:
            log.error('error at (%s, %s)', x, y)
            raise
    # normalization
    m_p[row,0] = 1
    m_p = m_p.tocsr()
    b = b[:row+1]
    log.debug('actual shape: %s', m_p.shape)
    s = lsqr(m_p, b, atol=1e-3, btol=1e-6, show=True)
    z_p = s[0]
    z_p = normalize(z_p)
    z = np.zeros((width, height))
    for row,(x,y) in coords.r.items():
        z[x,y] = z_p[row]
    log.debug('z(0,0) = %s', z[0,0])
    return z
Beispiel #30
0
def build_R_matrices(G, B, branch_map):
    """ rows are buses 2 to n; cols are branches  """
    n = G.shape[0]
    Rreal = dok_matrix((n-1, n-1))
    Rreac = dok_matrix((n-1, n-1))
    for fbus in range(1, n):
        for tbus in B[fbus, :].nonzero()[1]:
            branch = branch_map[(fbus, tbus)]
            Rreal[fbus-1, branch] = -G[fbus, tbus]
            Rreac[fbus-1, branch] = B[fbus, tbus]
    return Rreal, Rreac
Beispiel #31
0
    def find_translation(cls,
                         resolver,
                         src_type,
                         dst_type,
                         *,
                         exact=False) -> Optional["MultiStepTranslator"]:
        if isinstance(dst_type,
                      type) and not issubclass(dst_type, ConcreteType):
            dst_type = resolver.class_to_concrete.get(dst_type, dst_type)

        if not isinstance(dst_type, type):
            dst_type = dst_type.__class__

        if exact:
            trns = resolver.translators.get((src_type, dst_type), None)
            if trns is None:
                return
            mst = MultiStepTranslator(src_type)
            mst.add_after(trns, dst_type)
            return mst

        abstract = dst_type.abstract
        if abstract not in resolver.translation_matrices:
            # Build translation matrix
            concrete_list = []
            concrete_lookup = {}
            included_abstract_types = set()
            for ct in resolver.concrete_types:
                if (abstract is ct.abstract
                        or abstract in ct.abstract.unambiguous_subcomponents):
                    concrete_lookup[ct] = len(concrete_list)
                    concrete_list.append(ct)
                    included_abstract_types.add(ct.abstract)
            m = ss.dok_matrix((len(concrete_list), len(concrete_list)),
                              dtype=bool)
            for s, d in resolver.translators:
                # only accept destinations of included abstract types
                if d.abstract in included_abstract_types:
                    sidx = concrete_lookup[s]
                    didx = concrete_lookup[d]
                    m[sidx, didx] = True
            sssp, predecessors = ss.csgraph.dijkstra(m.tocsr(),
                                                     return_predecessors=True,
                                                     unweighted=True)
            resolver.translation_matrices[abstract] = (
                concrete_list,
                concrete_lookup,
                sssp,
                predecessors,
            )

        # Lookup shortest path from stored results
        packed_data = resolver.translation_matrices[abstract]
        concrete_list, concrete_lookup, sssp, predecessors = packed_data
        try:
            sidx = concrete_lookup[src_type]
            didx = concrete_lookup[dst_type]
        except KeyError:
            return None
        if sssp[sidx, didx] == np.inf:
            return None
        # Path exists; use predecessor matrix to build up required transformations
        mst = MultiStepTranslator(src_type)
        while sidx != didx:
            parent_idx = predecessors[sidx, didx]
            next_translator = resolver.translators[(concrete_list[parent_idx],
                                                    concrete_list[didx])]
            next_dst_type = concrete_list[didx]
            mst.add_before(next_translator, next_dst_type)
            didx = parent_idx

        return mst
Beispiel #32
0
def RP_AddExon(peaks_info, genes_info_full, genes_info_tss, decay):
    """Multiple processing function to calculate regulation potential."""

    Sg = lambda x: 2**(-x)
    checkInclude = lambda x, y: all([x >= y[0], x <= y[1]])
    gene_distance = 15 * decay
    genes_peaks_score_array = sp_sparse.dok_matrix(
        (len(genes_info_full), len(peaks_info)), dtype=np.float64)

    w = genes_info_tss + peaks_info
    A = {}

    w.sort()
    for elem in w:
        if elem[-3] == 1:
            A[elem[-1]] = elem
        else:
            dlist = []
            for gene_name in list(A.keys()):
                g = A[gene_name]
                tmp_distance = elem[1] - g[1]
                if all([
                        g[0] == elem[0],
                        any(
                            list(
                                map(checkInclude, [elem[1]] * len(g[5]),
                                    list(g[5]))))
                ]):
                    genes_peaks_score_array[gene_name, elem[-1]] = 1.0 / g[-4]
                elif all([g[0] == elem[0], tmp_distance <= gene_distance]):
                    genes_peaks_score_array[gene_name, elem[-1]] = Sg(
                        tmp_distance / decay)
                else:
                    dlist.append(gene_name)
            for gene_name in dlist:
                del A[gene_name]

    w.reverse()
    for elem in w:
        if elem[-3] == 1:
            A[elem[-1]] = elem
        else:
            dlist = []
            for gene_name in list(A.keys()):
                g = A[gene_name]
                tmp_distance = g[1] - elem[1]
                if all([
                        g[0] == elem[0],
                        any(
                            list(
                                map(checkInclude, [elem[1]] * len(g[5]),
                                    list(g[5]))))
                ]):
                    genes_peaks_score_array[gene_name, elem[-1]] = 1.0 / g[-4]
                if all([g[0] == elem[0], tmp_distance <= gene_distance]):
                    genes_peaks_score_array[gene_name, elem[-1]] = Sg(
                        tmp_distance / decay)
                else:
                    dlist.append(gene_name)
            for gene_name in dlist:
                del A[gene_name]

    return (genes_peaks_score_array)
Beispiel #33
0
def topoStatistics(W, nWrook, regular=False):
    frontiers = list(set(W.keys()) - set(nWrook.keys()))
    nw = []
    areas_nngs = {}
    wSparse = dok_matrix((len(W), len(W)))
    n1 = 0
    for w in W:
        n1 += len(W[w])
        for j in W[w]:
            wSparse[w, j] = 1
    try:
        eig = max(linalg.eigsh(wSparse, 2)[0])
    except:
        eig = -9999

    for w in nWrook:
        nw += [len(W[w])]
        if nw[-1] != 0:
            if areas_nngs.has_key(nw[-1]):
                areas_nngs[nw[-1]].append(w)
            else:
                areas_nngs[nw[-1]] = [w]
    # Calulating second moment of P(n)
    mu2 = 0
    mu1 = numpy.mean(nw)
    mu2 = numpy.var(nw)
    m = {}
    p = {}
    for n in areas_nngs:
        # mean average neighbors of areas wich are neighbor of an area with n neighbors
        mean = 0
        # number of areas wich are neighbor of areas areas with n neighbors
        nareas_n = 0
        for a in areas_nngs[n]:
            if a not in frontiers:
                neighs = W[a]
                for a1 in neighs:
                    mean += len(W[a1])
                    nareas_n += 1
        mean = mean / float(nareas_n)
        m[n] = mean
        p[n] = len(areas_nngs[n]) / float(len(nWrook))
    X1 = []
    X2 = []
    Y = []
    Y2 = []
    for n in m:
        for k in areas_nngs[n]:
            X1.append(1)
            X2.append(n)
            Y.append(n * m[n])
            Y2.append((n**2) * m[n])
    X = numpy.matrix(zip(X1, X2))
    Y = numpy.matrix(Y)
    sparseness = n1 / float(len(W)**2 - len(W))
    if regular:
        a1 = 0
        a2 = 0
        a3 = 0
        mu2 = 0
    else:
        B = (X.transpose() * X)**(-1) * X.transpose() * Y.transpose()
        a1 = (mu1 * (mu2 + numpy.mean(Y)) - numpy.mean(Y2)) / float(mu2)
        a2 = (B[0] - mu2) / mu1  #ESTE NO DA
        a3 = -1 * (B[1] - mu1)
    return max(nw), min(nw), numpy.mean(nw), mu2, a1, sparseness, eig
train_bow_features = bag_of_words_vectorizer.transform(train_corpus)

test_tokens = testset['tokens'].tolist()
test_corpus = createCorpus(test_tokens)
test_bow_features = bag_of_words_vectorizer.transform(test_corpus)

# Obtain LDA features

def createCorpusLDA(tokens):
    dictionary = corpora.Dictionary(tokens)
    corpus = [dictionary.doc2bow(token_list) for token_list in tokens]
    return (dictionary,corpus)

train_lda_dictionary, train_lda_corpus = createCorpusLDA(train_tokens)

train_lda_features = dok_matrix((len(train_lda_corpus),100))

for i in tqdm(range(len(train_lda_corpus))):
    topic_distribution = lda_model[train_lda_corpus[i]]
    for (topic_nr,prob) in topic_distribution:
        train_lda_features[i, topic_nr] = prob

train_lda_features_csr = train_lda_features.tocsr()

test_lda_dictionary, test_lda_corpus = createCorpusLDA(test_tokens)

test_lda_features = dok_matrix((len(test_lda_corpus),100))

for i in tqdm(range(len(test_lda_corpus))):
    topic_distribution = lda_model[test_lda_corpus[i]]
    for (topic_nr,prob) in topic_distribution:
Beispiel #35
0
    def load_pre_splitter_data(self):
        pos_per_user = {}
        num_items, num_users = 0, 0
        userids, itemids, idusers, iditems = {}, {}, {}, {}
        # Get number of users and items
        with open(self.path + ".train.rating", 'r') as f:
            for line in f.readlines():
                useridx, itemidx, rating, time = line.strip().split(
                    self.separator)
                if float(rating) >= self.threshold:
                    if itemidx not in itemids:
                        iditems[num_items] = itemidx
                        itemids[itemidx] = num_items
                        num_items += 1

                    if useridx not in userids:
                        idusers[num_users] = useridx
                        userids[useridx] = num_users
                        num_users += 1
                        pos_per_user[userids[useridx]] = []
                    pos_per_user[userids[useridx]].append(
                        [itemids[itemidx], 1, int(time)])
                else:
                    if itemidx not in itemids:
                        iditems[num_items] = itemidx
                        itemids[itemidx] = num_items
                        num_items += 1

                    if useridx not in userids:
                        idusers[num_users] = useridx
                        userids[useridx] = num_users
                        num_users += 1
                        pos_per_user[userids[useridx]] = []
                    pos_per_user[userids[useridx]].append(
                        (itemids[itemidx], rating, int(time)))

            train_dict = {}
            for u in range(num_users):
                pos_per_user[u] = sorted(pos_per_user[u], key=lambda d: d[2])
                items = []
                for enlement in pos_per_user[u]:
                    items.append(enlement[0])
                train_dict[u] = items

        with open(self.path + ".test.rating", 'r') as f:
            for line in f.readlines():
                useridx, itemidx, rating, time = line.strip().split(
                    self.separator)
                if float(rating) >= self.threshold:
                    if itemidx not in itemids:
                        iditems[num_items] = itemidx
                        itemids[itemidx] = num_items
                        num_items += 1

                    if useridx not in userids:
                        idusers[num_users] = useridx
                        userids[useridx] = num_users
                        num_users += 1
                        pos_per_user[userids[useridx]] = []
                    pos_per_user[userids[useridx]].append(
                        [itemids[itemidx], 1, int(time)])
                else:
                    if itemidx not in itemids:
                        iditems[num_items] = itemidx
                        itemids[itemidx] = num_items
                        num_items += 1

                    if useridx not in userids:
                        idusers[num_users] = useridx
                        userids[useridx] = num_users
                        num_users += 1
                        pos_per_user[userids[useridx]] = []
                    pos_per_user[userids[useridx]].append(
                        [itemids[itemidx], rating,
                         int(time)])
        for u in range(num_users):
            pos_per_user[u] = sorted(pos_per_user[u], key=lambda d: d[2])

        train_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32)
        time_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32)
        with open(self.path + ".train.rating", "r") as f:
            line = f.readline()
            while line and line != "":
                arr = line.split("\t")
                user, item, rating, time = userids[arr[0]], itemids[
                    arr[1]], float(arr[2]), float(arr[3])
                if float(rating) >= self.threshold:
                    train_matrix[user, item] = 1

                else:
                    train_matrix[user, item] = rating
                time_matrix[user, item] = time
                line = f.readline()
        print("already load the trainMatrix...")

        test_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32)
        with open(self.path + ".test.rating", "r") as f:
            line = f.readline()
            while line and line != "":
                arr = line.split("\t")
                user, item, rating, time = userids[arr[0]], itemids[
                    arr[1]], float(arr[2]), float(arr[3])
                if float(rating) >= self.threshold:
                    test_matrix[user, item] = 1
                else:
                    test_matrix[user, item] = rating
                time_matrix[user, item] = time
                line = f.readline()
        print("already load the trainMatrix...")

        return train_matrix, train_dict, test_matrix, pos_per_user, userids, itemids, time_matrix
Beispiel #36
0
        counts[wordId,userId]=count*log(float(U)/(1+idfs[wordId]))
    file.close()


# In[28]:

def proc(k):	
    (u, s, v) = svds(counts, k=k)
    return (u, s, v)


# In[29]:

vocab=[None]*V
reverseVocab={}
counts=sparse.dok_matrix((V,U))


# In[30]:

readWords("unigrams.txt")


# In[31]:

readIdfs("idf.txt")


# In[32]:

print "reading data"
Beispiel #37
0
 def __init__(self, treedata: tuple):
     self.result = []
     self.count = GetMax(treedata)
     self.dm = dok_matrix((self.count, self.count), dtype=bool)
     for e in treedata:
         self.dm[e[0], e[1]] = True
Beispiel #38
0
    def __init__(self, path, batch_size):
        self.path = path
        self.batch_size = batch_size

        train_file = path + '/train.txt'
        test_file = path + '/test.txt'

        self.n_users, self.n_items = 0, 0
        self.n_train, self.n_test = 0, 0
        self.neg_pools = {}

        self.exist_users = []

        with open(train_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    items = [int(i) for i in l[1:]]
                    uid = int(l[0])
                    self.exist_users.append(uid)
                    self.n_items = max(self.n_items, max(items))
                    self.n_users = max(self.n_users, uid)
                    self.n_train += len(items)

        with open(test_file) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n')
                    try:
                        items = [int(i) for i in l.split(' ')[1:]]
                    except Exception:
                        continue
                    self.n_items = max(self.n_items, max(items))
                    self.n_test += len(items)
        self.n_items += 1
        self.n_users += 1
        self.print_statistics()
        self.R = sp.dok_matrix((self.n_users, self.n_items), dtype=np.float32)
        self.train_items, self.test_set = {}, {}
        with open(train_file) as f_train:
            with open(test_file) as f_test:
                for l in f_train.readlines():
                    if len(l) == 0: break
                    l = l.strip('\n')
                    items = [int(i) for i in l.split(' ')]
                    uid, train_items = items[0], items[1:]

                    for i in train_items:
                        self.R[uid, i] = 1.

                    self.train_items[uid] = train_items

                for l in f_test.readlines():
                    if len(l) == 0: break
                    l = l.strip('\n')
                    try:
                        items = [int(i) for i in l.split(' ')]
                    except Exception:
                        continue

                    uid, test_items = items[0], items[1:]
                    self.test_set[uid] = test_items
Beispiel #39
0
    def load_data_by_user_time(self):
        logger.info("Loading interaction records from %s " % (self.path))
        pos_per_user = {}
        num_ratings = 0
        num_items = 0
        num_users = 0
        #user/item {raw id, inner id} map
        userids = {}
        itemids = {}
        # inverse views of userIds, itemIds,
        idusers = {}
        iditems = {}
        with open(self.path, 'r') as f:
            for line in f.readlines():
                if self.data_format == "UIRT":
                    useridx, itemidx, rating, time = line.strip().split(
                        self.separator)
                    if float(rating) < self.threshold:
                        continue
                elif self.data_format == "UIT":
                    useridx, itemidx, time = line.strip().split(self.separator)
                    rating = 1
                elif self.data_format == "UIR":
                    useridx, itemidx, rating = line.strip().split(
                        self.separator)
                    if float(rating) < self.threshold:
                        continue
                elif self.data_format == "UI":
                    useridx, itemidx = line.strip().split(self.separator)
                    rating = 1

                else:
                    print("please choose a correct data format. ")

                num_ratings += 1
                if itemidx not in itemids:
                    iditems[num_items] = itemidx
                    itemids[itemidx] = num_items
                    num_items += 1

                if useridx not in userids:
                    idusers[num_users] = useridx
                    userids[useridx] = num_users
                    num_users += 1
                    pos_per_user[userids[useridx]] = []
                if self.data_format == "UIRT" or self.data_format == "UIT":
                    pos_per_user[userids[useridx]].append(
                        (itemids[itemidx], rating, int(float(time))))

                else:
                    pos_per_user[userids[useridx]].append(
                        (itemids[itemidx], rating, 1))

        if self.data_format == "UIRT" or self.data_format == "UIT":
            for u in range(num_users):
                pos_per_user[u] = sorted(pos_per_user[u], key=lambda d: d[2])
        logger.info("\"num_users\": %d,\"num_items\":%d, \"num_ratings\":%d" %
                    (num_users, num_items, num_ratings))
        userseq = deepcopy(pos_per_user)
        train_dict = {}
        train_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32)
        test_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32)
        time_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32)
        for u in range(num_users):
            num_ratings_by_user = len(pos_per_user[u])
            num_test_ratings = math.floor(
                float(self.splitterRatio[1]) * num_ratings_by_user)
            if len(pos_per_user[u]) >= 2 and num_test_ratings >= 1:
                for _ in range(num_test_ratings):
                    test_item = pos_per_user[u][-1]
                    pos_per_user[u].pop()
                    test_matrix[u, test_item[0]] = test_item[1]
                    time_matrix[u, test_item[0]] = test_item[2]
            items = []
            for enlement in pos_per_user[u]:
                items.append(enlement[0])
                train_matrix[u, enlement[0]] = enlement[1]
                time_matrix[u, enlement[0]] = enlement[2]
            train_dict[u] = items
        return train_matrix, train_dict, test_matrix, userseq, userids, itemids, time_matrix
Beispiel #40
0
    def solve_fourier(self, kappa, **argv):

        if np.isscalar(kappa):
            kappa = np.diag(np.diag(kappa * np.eye(self.dim)))

        if kappa.ndim == 2:
            kappa = np.repeat(np.array([np.diag(np.diag(kappa))]),
                              self.n_elems,
                              axis=0)

        F = sp.dok_matrix((self.n_elems, self.n_elems))
        B = np.zeros(self.n_elems)
        for ll in self.mesh['active_sides']:

            area = self.mesh['areas'][ll]
            (i, j) = self.mesh['side_elem_map_vec'][ll]
            vi = self.mesh['volumes'][i]
            vj = self.mesh['volumes'][j]
            kappa_loc = self.get_kappa(i, j, ll, kappa)
            #kappa_loc = np.eye(2)*kappa_loc[0,0]

            if not i == j:

                (v_orth, dummy) = self.get_decomposed_directions(ll,
                                                                 rot=kappa_loc)
                F[i, i] += v_orth / vi * area
                F[i, j] -= v_orth / vi * area
                F[j, j] += v_orth / vj * area
                F[j, i] -= v_orth / vj * area
                if ll in self.mesh['periodic_sides']:
                    kk = list(self.mesh['periodic_sides']).index(ll)
                    B[i] += self.mesh['periodic_side_values'][
                        kk] * v_orth / vi * area
                    B[j] -= self.mesh['periodic_side_values'][
                        kk] * v_orth / vj * area

        ##rescaleand fix one point to 0
        #scale = 1/F.max(axis=0).toarray()[0]
        #n = np.random.randint(self.n_elems)
        #scale[n] = 0
        #F.data = F.data * scale[F.indices]
        #F[n,n] = 1
        #B[n] = 0
        SU = splu(F.tocsc())
        #-----------------------

        C = np.zeros(self.n_elems)
        n_iter = 0
        kappa_old = 0
        error = 1
        grad = np.zeros((self.n_elems, self.dim))
        while error > self.max_fourier_error and \
                       n_iter < self.max_fourier_iter :
            RHS = B + C
            #for n in range(self.n_elems):
            #  RHS[n] = RHS[n]*scale[n]

            temp = SU.solve(RHS)
            temp = temp - (max(temp) + min(temp)) / 2.0
            kappa_eff = self.compute_diffusive_thermal_conductivity(
                temp, grad, kappa)
            error = abs((kappa_eff - kappa_old) / kappa_eff)
            kappa_old = kappa_eff
            n_iter += 1

            C, grad = self.compute_secondary_flux(temp, kappa)

        flux = -np.einsum('cij,cj->ci', kappa, grad)

        meta = [kappa_eff, error, n_iter]
        return {
            'flux_fourier': flux,
            'temperature_fourier': temp,
            'meta': np.array(meta),
            'grad': grad
        }
Beispiel #41
0
    def solve(self, objective, constraints, cached_data, warm_start, verbose,
              solver_opts):
        """Returns the result of the call to the solver.

        Parameters
        ----------
        objective : LinOp
            The canonicalized objective.
        constraints : list
            The list of canonicalized cosntraints.
        cached_data : dict
            A map of solver name to cached problem data.
        warm_start : bool
            Not used.
        verbose : bool
            Should the solver print output?
        solver_opts : dict
            Additional arguments for the solver.

        Returns
        -------
        tuple
            (status, optimal value, primal, equality dual, inequality dual)
        """
        import gurobipy

        # Get problem data
        data = self.get_problem_data(objective, constraints, cached_data)

        c = data[s.C]
        b = data[s.B]
        A = dok_matrix(data[s.A])
        # Save the dok_matrix.
        data[s.A] = A

        n = c.shape[0]

        solver_cache = cached_data[self.name()]

        # TODO warmstart with SOC constraints.
        if warm_start and solver_cache.prev_result is not None \
           and len(data[s.DIMS][s.SOC_DIM]) == 0:
            model = solver_cache.prev_result["model"]
            variables = solver_cache.prev_result["variables"]
            gur_constrs = solver_cache.prev_result["gur_constrs"]
            c_prev = solver_cache.prev_result["c"]
            A_prev = solver_cache.prev_result["A"]
            b_prev = solver_cache.prev_result["b"]

            # If there is a parameter in the objective, it may have changed.
            if len(lu.get_expr_params(objective)) > 0:
                c_diff = c - c_prev

                I_unique = list(set(np.where(c_diff)[0]))

                for i in I_unique:
                    variables[i].Obj = c[i]
            else:
                # Stay consistent with Gurobi's representation of the problem
                c = c_prev

            # Get equality and inequality constraints.
            sym_data = self.get_sym_data(objective, constraints, cached_data)
            all_constrs, _, _ = self.split_constr(sym_data.constr_map)

            # If there is a parameter in the constraints,
            # A or b may have changed.
            if self._param_in_constr(all_constrs):
                A_diff = dok_matrix(A - A_prev)
                b_diff = b - b_prev

                # Figure out which rows of A and elements of b have changed
                try:
                    I, _ = zip(*[x for x in A_diff.iterkeys()])
                except ValueError:
                    I = []
                I_unique = list(set(I) | set(np.where(b_diff)[0]))

                nonzero_locs = gurobipy.tuplelist([x for x in A.iterkeys()])

                # Update locations which have changed
                for i in I_unique:

                    # Remove old constraint if it exists
                    if gur_constrs[i] is not None:
                        model.remove(gur_constrs[i])
                        gur_constrs[i] = None

                    # Add new constraint
                    if len(nonzero_locs.select(i, "*")) > 0:
                        expr_list = []
                        for loc in nonzero_locs.select(i, "*"):
                            expr_list.append((A[loc], variables[loc[1]]))
                        expr = gurobipy.LinExpr(expr_list)
                        if i < data[s.DIMS][s.EQ_DIM]:
                            ctype = gurobipy.GRB.EQUAL
                        elif data[s.DIMS][s.EQ_DIM] <= i \
                                < data[s.DIMS][s.EQ_DIM] + data[s.DIMS][s.LEQ_DIM]:
                            ctype = gurobipy.GRB.LESS_EQUAL
                        gur_constrs[i] = model.addConstr(expr, ctype, b[i])

                model.update()
            else:
                # Stay consistent with Gurobi's representation of the problem
                A = A_prev
                b = b_prev

        else:
            model = gurobipy.Model()
            variables = []
            for i in range(n):
                # Set variable type.
                if i in data[s.BOOL_IDX]:
                    vtype = gurobipy.GRB.BINARY
                elif i in data[s.INT_IDX]:
                    vtype = gurobipy.GRB.INTEGER
                else:
                    vtype = gurobipy.GRB.CONTINUOUS
                variables.append(
                    model.addVar(
                        obj=c[i],
                        name="x_%d" % i,
                        vtype=vtype,
                        # Gurobi's default LB is 0 (WHY???)
                        lb=-gurobipy.GRB.INFINITY,
                        ub=gurobipy.GRB.INFINITY))
            model.update()

            nonzero_locs = gurobipy.tuplelist([x for x in A.iterkeys()])
            eq_constrs = self.add_model_lin_constr(
                model, variables, range(data[s.DIMS][s.EQ_DIM]),
                gurobipy.GRB.EQUAL, nonzero_locs, A, b)
            leq_start = data[s.DIMS][s.EQ_DIM]
            leq_end = data[s.DIMS][s.EQ_DIM] + data[s.DIMS][s.LEQ_DIM]
            ineq_constrs = self.add_model_lin_constr(model, variables,
                                                     range(leq_start, leq_end),
                                                     gurobipy.GRB.LESS_EQUAL,
                                                     nonzero_locs, A, b)
            soc_start = leq_end
            soc_constrs = []
            new_leq_constrs = []
            for constr_len in data[s.DIMS][s.SOC_DIM]:
                soc_end = soc_start + constr_len
                soc_constr, new_leq, new_vars = self.add_model_soc_constr(
                    model, variables, range(soc_start, soc_end), nonzero_locs,
                    A, b)
                soc_constrs.append(soc_constr)
                new_leq_constrs += new_leq
                variables += new_vars
                soc_start += constr_len

            gur_constrs = eq_constrs + ineq_constrs + \
                soc_constrs + new_leq_constrs
            model.update()

        # Set verbosity and other parameters
        model.setParam("OutputFlag", verbose)
        # TODO user option to not compute duals.
        model.setParam("QCPDual", True)

        for key, value in solver_opts.items():
            model.setParam(key, value)

        results_dict = {}
        try:
            model.optimize()
            results_dict["primal objective"] = model.ObjVal
            results_dict["x"] = np.array([v.X for v in variables])

            # Only add duals if not a MIP.
            # Not sure why we need to negate the following,
            # but need to in order to be consistent with other solvers.
            if not self.is_mip(data):
                vals = []
                for lc in gur_constrs:
                    if lc is not None:
                        if isinstance(lc, gurobipy.QConstr):
                            vals.append(lc.QCPi)
                        else:
                            vals.append(lc.Pi)
                    else:
                        vals.append(0)
                results_dict["y"] = -np.array(vals)
        except:
            pass

        results_dict["model"] = model
        results_dict["variables"] = variables
        results_dict["gur_constrs"] = gur_constrs
        results_dict["status"] = self.STATUS_MAP.get(model.Status,
                                                     s.SOLVER_ERROR)

        return self.format_results(results_dict, data, cached_data)
Beispiel #42
0
import numpy as np
from scipy.sparse import dok_matrix

S = dok_matrix((10000, 10000), dtype=np.float32)
S[0:10, 0:10] = 0.5
print(type(S))
Beispiel #43
0
    earth_height = earth_map.height
    my_width = earth_width
    my_height = earth_height

    for x in range(-1, earth_width+1):
        for y in range(-1, earth_height+1):
            coords = (x, y)
            if x==-1 or y==-1 or x == earth_map.width or y== earth_map.height:
                passable_locations_earth[coords]= False
            elif earth_map.is_passable_terrain_at(bc.MapLocation(earth, x, y)):
                passable_locations_earth[coords] = True
            else:
                passable_locations_earth[coords]= False

    number_of_cells = earth_width * earth_height
    S = dok_matrix((number_of_cells, number_of_cells), dtype=int)
    for x in range(earth_width):
        for y in range(earth_height):
            curr = (x, y)
            if passable_locations_earth[curr]:
                val = y*earth_width + x
                for coord in explore.coord_neighbors(curr):
                    if passable_locations_earth[coord]:
                        val2 = coord[1]*earth_width + coord[0]
                        S[val, val2] = 1
                        S[val2, val] = 1

    bfs_array = csgraph.shortest_path(S, method = 'D', unweighted = True)
    #bfs_dict = {} # stores the distances found by BFS so far
    #precomputed_bfs = explore.precompute_earth(passable_locations_earth, coord_to_direction, wavepoints)
    #start_time = time.time()
Beispiel #44
0
    def construct_matrix(self):
        train_path = self.path + '/train.txt'
        test_path = self.path + '/test.txt'
        user_list_file = pd.read_csv(self.path + '/user_list.txt', sep=' ')
        item_list_file = pd.read_csv(self.path + '/item_list.txt', sep=' ')

        self.n_users, self.n_items = 0, 0
        self.n_train, self.n_test = 0, 0
        self.neg_pools = {}

        self.exist_users = []

        with open(train_path) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n').split(' ')
                    items = [int(i) for i in l[1:]]
                    uid = int(l[0])
                    self.exist_users.append(uid)
                    self.n_items = max(self.n_items, max(items))
                    self.n_users = max(self.n_users, uid)
                    self.n_train += len(items)

        with open(test_path) as f:
            for l in f.readlines():
                if len(l) > 0:
                    l = l.strip('\n')
                    try:
                        items = [int(i) for i in l.split(' ')[1:]]
                    except Exception:
                        continue
                    self.n_items = max(self.n_items, max(items))
                    self.n_test += len(items)
        self.n_items += 1
        self.n_users += 1

        start_ts = time()
        users_set = set()
        items_set = set()
        self.R = sp.dok_matrix((self.n_users, self.n_items), dtype=np.float32)

        # max_train_user_item = 0
        # with open(train_path, 'r') as f_train:
        #     for l in f_train.readlines():
        #         if len(l) == 0:
        #             break
        #         items = [int(i) for i in l.strip().split(' ')]
        #         user, train_items = items[0], items[1:]
        #         max_train_user_item = max(
        #             max_train_user_item, len(train_items))

        #         users_set.add(user)
        #         for item in train_items:
        #             u2i_matrix[user, item] = 1.
        #             items_set.add(item)

        # self.max_train_user_item = max_train_user_item

        # self.u2i_matrix = u2i_matrix.tocsr()
        # self.i2u_matrix = u2i_matrix.transpose().tocsr()
        # u2u_matrix = self.u2i_matrix * self.i2u_matrix
        # i2i_matrix = self.i2u_matrix * self.u2i_matrix

        # self.users_arr = np.sort(np.asarray(list(users_set)))
        # self.items_arr = np.sort(np.asarray(list(items_set)))
        # self.u2i_adj = self.construct_adj(self.u2i_matrix, self.n_users)
        # self.i2u_adj = self.construct_adj(self.i2u_matrix, self.n_items)
        # self.u2u_adj = self.construct_adj(u2u_matrix, self.n_users)
        # self.i2i_adj = self.construct_adj(i2i_matrix, self.n_items)

        # # self.test_u2i_dict = dict()
        # test_u2i_matrix = sp.dok_matrix(
        #     (self.n_users, self.n_items), dtype=np.float32)
        # max_test_user_item = 0
        # with open(test_path, 'r') as f_test:
        #     for l in f_test.readlines():
        #         if len(l) == 0:
        #             break
        #         items = [int(i) for i in l.strip().split(' ')]
        #         user, test_items = items[0], items[1:]
        #         # self.test_u2i_dict[user] = test_items
        #         for item in test_items:
        #             test_u2i_matrix[user, item] = 1.
        #         max_test_user_item = max(max_test_user_item, len(test_items))
        # self.max_test_user_item = max_test_user_item
        # self.test_u2i_adj = self.construct_adj(
        #     test_u2i_matrix.tocsr(), self.n_users)

        self.train_items, self.test_set = {}, {}
        with open(train_path) as f_train:
            with open(test_path) as f_test:
                for l in f_train.readlines():
                    if len(l) == 0: break
                    l = l.strip('\n')
                    items = [int(i) for i in l.split(' ')]
                    uid, train_items = items[0], items[1:]

                    for i in train_items:
                        self.R[uid, i] = 1.
                        # self.R[uid][i] = 1
                        items_set.add(i)

                    self.train_items[uid] = train_items

                for l in f_test.readlines():
                    if len(l) == 0: break
                    l = l.strip('\n')
                    try:
                        items = [int(i) for i in l.split(' ')]
                    except Exception:
                        continue

                    uid, test_items = items[0], items[1:]
                    self.test_set[uid] = test_items

        self.items_arr = np.sort(np.asarray(list(items_set)))
        self.u2i_matrix = self.R.tocsr()
        self.i2u_matrix = self.R.transpose().tocsr()
        u2u_matrix = self.u2i_matrix * self.i2u_matrix
        i2i_matrix = self.i2u_matrix * self.u2i_matrix
        self.u2i_adj = self.construct_adj(self.u2i_matrix, self.n_users)
        self.i2u_adj = self.construct_adj(self.i2u_matrix, self.n_items)
        self.u2u_adj = self.construct_adj(u2u_matrix, self.n_users)
        self.i2i_adj = self.construct_adj(i2i_matrix, self.n_items)

        # print('Start construct negative')
        # t1 = time()
        # self.negative_u2i_pool = [self.get_negative_pool(self.i2i_adj, user_items, self.items_arr) for user_items in self.u2i_adj]
        # t2 = time()
        # print('Construct negative_u2i_pool. Time: {:5.3f}'.format(t2 - t1))
        # self.negative_u2u_pool = [self.get_negative_pool(self.u2u_adj, user_users, self.users_arr) for user_users in self.u2u_adj]
        # t3 = time()
        # print('Construct negative_u2u_pool. Time: {:5.3f}'.format(t3 - t2))
        # self.negative_i2i_pool = [self.get_negative_pool(self.i2i_adj, item_items, self.items_arr) for item_items in self.i2i_adj]
        end_ts = time()
        print('Construct adjust matrix. Time: {:5.3f}'.format(end_ts -
                                                              start_ts))
def convertCompatible(h,J):
    h=dict(zip(range(len(h)),h))
    J=sparse.dok_matrix(J)
    J=dict(zip(J.keys(),J.values()))
    return h,J
Beispiel #46
0
def main(param2val):  # param2val appears auto-magically via Ludwig
    cwc_param_name = param2val['cwc_param_name']
    window_size = param2val['window_size']
    window_weight = param2val['window_weight']
    window_type = param2val['window_type']
    vocab_name = param2val['vocab_name']
    article_coverage = param2val['article_coverage']
    # added by Ludwig
    project_path = Path(param2val['project_path'])
    save_path = Path(
        param2val['save_path'])  # all data that is saved must be saved here

    for k, v in param2val.items():
        print(k, v)

    # step 0
    print('Making vocab...')
    vocab_path = project_path / 'data' / '{}.txt'.format(vocab_name)
    if not vocab_path.exists():
        raise FileNotFoundError('{} not found on server'.format(vocab_path))
    vocab = SortedSet(vocab_path.read_text().split('\n'))
    vocab.discard('')  # not sure why empty string is in vocab - but it is
    assert len(vocab) > 0

    print('Loaded {} words from vocab'.format(len(vocab)))

    # step 1
    print('Tokenizing...', flush=True)
    param_path = project_path.parent / 'CreateWikiCorpus' / 'runs' / cwc_param_name
    bodies_path = get_text_file_path(param_path, 'bodies')
    titles_path = get_text_file_path(param_path, 'titles')
    num_docs = len(titles_path.read_text().split(
        '\n')) - 1  # "wc -l" says there is 1 less line
    print(f'Number of articles in text file={num_docs}')
    tokenized_docs = gen_tokenized_articles(bodies_path,
                                            num_docs)  # this also lower-cases

    # step 2
    print('Making co-occurrence matrix', flush=True)
    w2id = {w: n
            for n, w in enumerate(vocab)
            }  # python 3 integers have dynamic size
    id2w = {n: w for n, w in enumerate(vocab)}
    max_num_docs = int(num_docs * article_coverage)
    cooc_matrix = make_sparse_ww_matrix(
        tokenized_docs,
        w2id,
        max_num_docs=max_num_docs,
        window_size=window_size,
        window_type=window_type,
        window_weight=window_weight,
    )
    verbose = True if cooc_matrix.size < 1000 else False
    ids2cf = sparse.dok_matrix(cooc_matrix).todok()
    ww2cf = {}
    print('Converting sparse matrix to dictionary...', flush=True)
    for ids, cf in ids2cf.items():
        i1, i2 = ids
        word1 = id2w[i1]
        word2 = id2w[i2]
        ww = (word1, word2)
        ww2cf[ww] = cf
    # check
    if verbose:
        print(w2id)
        print(cooc_matrix.toarray())
        print(cooc_matrix.shape)
        print(ww2cf)

    # step 3 - save the dictionary containing co-occurrence frequencies to Ludwig-supplied save_path
    print('Saving dictionary to disk...')
    ww2cf_path = save_path / 'ww2cf.pkl'
    if not ww2cf_path.parent.exists():
        ww2cf_path.parent.mkdir(parents=True)
    pickle.dump(ww2cf, ww2cf_path.open('wb'))

    print(
        "Emily is done making a wiki co-occurrence dictionary! Wait for the folders to finish moving!"
    )

    return []
i2t = {}

for x in f.xreadlines():
    t_ids = map(lambda x: int(x), x.split(" "))
    for t_id in t_ids:
        if t_id in t2i:
            i = t2i[t_id]
        else:
            i = len(t2i) + 1
            t2i[t_id] = i
            i2t[i] = t_id

from scipy.sparse import dok_matrix
import numpy as np

n = len(t2i)
A = dok_matrix((n, n), dtype=np.int8)
print n

f = open("social_graph.txt")
for x in f.xreadlines():
    t_ids = map(lambda x: int(x), x.split(" "))
    source = t_ids[0]
    for t_id in t_ids[1:]:
        dest = t2i[t_id]
        print dest, source
        A[dest, source] = 1

import scipy.io as sio
savemat('A.mat', dict(A=A))
def test_pairwise_distances_argmin_min():
    # Check pairwise minimum distances computation for any metric
    X = [[0], [1]]
    Y = [[-2], [3]]

    Xsp = dok_matrix(X)
    Ysp = csr_matrix(Y, dtype=np.float32)

    expected_idx = [0, 1]
    expected_vals = [2, 2]
    expected_vals_sq = [4, 4]

    # euclidean metric
    idx, vals = pairwise_distances_argmin_min(X, Y, metric="euclidean")
    idx2 = pairwise_distances_argmin(X, Y, metric="euclidean")
    assert_array_almost_equal(idx, expected_idx)
    assert_array_almost_equal(idx2, expected_idx)
    assert_array_almost_equal(vals, expected_vals)
    # sparse matrix case
    idxsp, valssp = pairwise_distances_argmin_min(Xsp, Ysp, metric="euclidean")
    assert_array_almost_equal(idxsp, expected_idx)
    assert_array_almost_equal(valssp, expected_vals)
    # We don't want np.matrix here
    assert_equal(type(idxsp), np.ndarray)
    assert_equal(type(valssp), np.ndarray)

    # euclidean metric squared
    idx, vals = pairwise_distances_argmin_min(X,
                                              Y,
                                              metric="euclidean",
                                              metric_kwargs={"squared": True})
    assert_array_almost_equal(idx, expected_idx)
    assert_array_almost_equal(vals, expected_vals_sq)

    # Non-euclidean scikit-learn metric
    idx, vals = pairwise_distances_argmin_min(X, Y, metric="manhattan")
    idx2 = pairwise_distances_argmin(X, Y, metric="manhattan")
    assert_array_almost_equal(idx, expected_idx)
    assert_array_almost_equal(idx2, expected_idx)
    assert_array_almost_equal(vals, expected_vals)
    # sparse matrix case
    idxsp, valssp = pairwise_distances_argmin_min(Xsp, Ysp, metric="manhattan")
    assert_array_almost_equal(idxsp, expected_idx)
    assert_array_almost_equal(valssp, expected_vals)

    # Non-euclidean Scipy distance (callable)
    idx, vals = pairwise_distances_argmin_min(X,
                                              Y,
                                              metric=minkowski,
                                              metric_kwargs={"p": 2})
    assert_array_almost_equal(idx, expected_idx)
    assert_array_almost_equal(vals, expected_vals)

    # Non-euclidean Scipy distance (string)
    idx, vals = pairwise_distances_argmin_min(X,
                                              Y,
                                              metric="minkowski",
                                              metric_kwargs={"p": 2})
    assert_array_almost_equal(idx, expected_idx)
    assert_array_almost_equal(vals, expected_vals)

    # Compare with naive implementation
    rng = np.random.RandomState(0)
    X = rng.randn(97, 149)
    Y = rng.randn(111, 149)

    dist = pairwise_distances(X, Y, metric="manhattan")
    dist_orig_ind = dist.argmin(axis=0)
    dist_orig_val = dist[dist_orig_ind, range(len(dist_orig_ind))]

    dist_chunked_ind, dist_chunked_val = pairwise_distances_argmin_min(
        X, Y, axis=0, metric="manhattan")
    np.testing.assert_almost_equal(dist_orig_ind, dist_chunked_ind, decimal=7)
    np.testing.assert_almost_equal(dist_orig_val, dist_chunked_val, decimal=7)

    # Test batch_size deprecation warning
    assert_warns_message(DeprecationWarning,
                         "version 0.22",
                         pairwise_distances_argmin_min,
                         X,
                         Y,
                         batch_size=500,
                         metric='euclidean')
Beispiel #49
0
def deviceid_app():
    
    deviceid_packages=pd.read_csv(file_path+'deviceid_packages.csv')
    
    def app_list(text):
        app_list=text.split('|')
#        print (app_list)
        return app_list
    deviceid_packages['add_list']=deviceid_packages['add_id_list'].apply(lambda line:app_list(line)).tolist()

    # 统计训练集中有多少不同的用户的events
    unique_deviceid = set(deviceid_packages['device_id'].values.tolist())
    unique_app = set(deviceid_packages['add_list'].values.tolist())
    
    n_unique_deviceid = len(unique_deviceid)
    n_unique_app = len(unique_app)
    
    print("number of uniqueUsers :%d" % n_unique_deviceid)
    print("number of uniqueEvents :%d" % n_unique_app)
    
    #用户关系矩阵表,可用于后续LFM/SVD++处理的输入
    #这是一个稀疏矩阵,记录用户对活动感兴趣
    dev_app_Scores = ss.dok_matrix((n_unique_deviceid, n_unique_app))
    dev_Index = dict()
    app_Index = dict()
    
    #重新编码用户索引字典
    for i, u in enumerate(unique_deviceid):
        dev_Index[u] = i
        
    #重新编码活动索引字典    
    for i, e in enumerate(unique_app):
        app_Index[e] = i
        
        
    #统计每个用户参加的活动   / 每个活动参加的用户
    appFordev = defaultdict(set)
    devForapp = defaultdict(set)
    
    
    n_records = 0
    train_dict=deviceid_packages.loc[:,['device_id','add_list']].to_dict(orient='records')
    for line in train_dict:
        device_id = line.get('device_id','')
        app_list = line.get('add_list','')
        for app in app_list:
            i = dev_Index[device_id]  #用户
            j = app_Index[app_list] #活动
        
            appFordev[i].add(j)    #该用户参加了这个活动
            devForapp[j].add(i)    #该活动被用户参加
            dev_app_Scores[i, j] = 1
            
  
    ##统计每个用户参加的活动,后续用于将用户朋友参加的活动影响到用户
    pk.dump(devForapp, open("devForapp.pkl", 'wb'))
    ##统计活动参加的用户
    pk.dump(appFordev, open("appFordev.pkl", 'wb'))
    
    #保存用户-活动关系矩阵R,以备后用
    sio.mmwrite("dev_app_Scores", dev_app_Scores)
    
    
    #保存用户索引表
    pk.dump(dev_Index, open("dev_Index.pkl", 'wb'))
    #保存活动索引表
    pk.dump(app_Index, open("app_Index.pkl", 'wb'))


    # 为了防止不必要的计算,我们找出来所有关联的用户 或者 关联的event
    # 所谓的关联用户,指的是至少在同一个event上有行为的用户pair
    # 关联的event指的是至少同一个user有行为的event pair
    unique_dev_Pairs = set()
    unique_app_Pairs = set()
    for deviceid in unique_deviceid:
        i = dev_Index[deviceid]
        dev_id = devForapp[i]
        if len(dev_id) > 2:
            unique_dev_Pairs.update(itertools.combinations(dev_id, 2))
    
    for user in n_unique_app:
        u = app_Index[user]
        apps = appFordev[u]
        if len(apps) > 2:
            unique_app_Pairs.update(itertools.combinations(apps, 2))
     
    #保存用户-事件关系对索引表
    pk.dump(unique_dev_Pairs, open("unique_dev_Pairs.pkl", 'wb'))
    pk.dump(unique_app_Pairs, open("unique_app_Pairs.pkl", 'wb'))
Beispiel #50
0
    print 'done with round 1'
    sys.stdout.flush()

    if not fix_vocab:
        vocab = [w for w in vocab if vocab[w] > 40]
        inv_vocab = dict(zip(vocab, xrange(len(vocab))))
    else:
        vocab, inv_vocab, _, = pickle.load(file('vocab.pk'))

    #for pat in pool.imap_unordered(realPatient, real_patient_generator(src=xml_src, max_patients=max_patients), chunksize=100):
    for n, pat in enumerate(
            real_patient_generator(src=xml_src, max_patients=max_patients)):
        pat = realPatient(pat)
        txt = set(pat['Text'].split('|'))
        m = sparse.dok_matrix((1, len(vocab)))
        for w in txt:
            if w in inv_vocab:
                m[0, inv_vocab[w]] = 1
        pat['sparse_X'] = m
        index = pat['index']
        if n % 100 == 0:
            print n
            sys.stdout.flush()

        visitShelf[index] = pat

    print 'done with round 2'
    sys.stdout.flush()

    visitShelf.close()
    def construct_linear_system(self):
        """
        construct the sparse matrix
        more algorithms are available if the matrix is symmetric, which
        requires evaluating the Dirichlet BCs rather than putting them in the
        matrix
        """
        N = self.grid.Ncells()
        Nbc = len(self.dirichlet_bcs)
        self.Ncalc = Ncalc = N - Nbc

        # map cells to forced values
        dirichlet = dict([(c, v) for c, v, xy in self.dirichlet_bcs])

        self.is_calc_c = is_calc_c = np.ones(N, np.bool8)
        for c, v, xy in self.dirichlet_bcs:
            is_calc_c[c] = False

        # is_calc_c[self.c_mask] = False

        # c_map is indexed by real cell indices, and returns the matrix index
        c_map = self.c_map = np.zeros(N, np.int32)
        self.c_map[is_calc_c] = np.arange(Ncalc)

        dzc = self.dzc
        dzf = self.dzf
        area_c = self.area_c

        meth = 'coo'  # 'dok'
        if meth is 'dok':
            A = sparse.dok_matrix((Ncalc, Ncalc), np.float64)
        else:
            # construct the matrix from a sequence of indices and values
            ij = []
            values = []  # successive value for the same i.j will be summed

        b = np.zeros(Ncalc, np.float64)
        flux_per_gradient_j = -self.K_j * self.l_j * dzf / self.d_j * self.dt

        self.grid.edge_to_cells()  # makes sure that edges['cells'] exists.

        for j in range(self.grid.Nedges()):
            if self.grid.edges['cells'][j, 1] < 0:
                continue  # boundary edge
            else:
                e = self.grid.edges[j]

            flux_per_gradient = flux_per_gradient_j[j]

            # this is the desired operation:
            #  Cdiff[ic1] -= flux_per_gradient / (An[ic1]*dzc) * (C[ic2] - C[ic1])
            #  Cdiff[ic2] += flux_per_gradient / (An[ic2]*dzc) * (C[ic2] - C[ic1])
            # Where Cdiff is row, C is col
            ic1, ic2 = e['cells']

            if is_calc_c[ic1] and is_calc_c[ic2]:
                mic2 = c_map[ic2]
                mic1 = c_map[ic1]
                v1 = flux_per_gradient / (area_c[ic1] * dzc[ic1])
                v2 = flux_per_gradient / (area_c[ic2] * dzc[ic2])

                if meth is 'dok':
                    A[mic1, mic2] -= v1
                    A[mic1, mic1] += v1
                    A[mic2, mic2] += v2
                    A[mic2, mic1] -= v2
                else:
                    ij.append((mic1, mic2))
                    values.append(-v1)
                    ij.append((mic1, mic1))
                    values.append(v1)
                    ij.append((mic2, mic2))
                    values.append(v1)
                    ij.append((mic2, mic1))
                    values.append(-v1)

            elif not (is_calc_c[ic1] or is_calc_c[ic2]):
                # both are dirichlet, so nothing to do
                pass
            elif not is_calc_c[ic2]:
                mic1 = c_map[ic1]
                v = flux_per_gradient / (self.area_c[ic1] * dzc[ic1])
                if meth is 'dok':
                    A[mic1, mic1] += v
                else:
                    ij.append((mic1, mic1))
                    values.append(v)

                # roughly
                # A[1,1]*x[1] + A[1,2]*x[2] + ... = b[1]
                # but we already know x[2],
                # A[1,1]*x[1] + ... = b[1] - A[1,2]*x[2]
                # so flip the sign, multiply by known dirichlet value, and
                # add to the RHS
                b[mic1] += flux_per_gradient / (area_c[ic1] *
                                                dzc[ic1]) * dirichlet[ic2]
            else:  # not is_calc_c[c1]
                mic2 = c_map[ic2]
                # A[mic2,mic2] += flux_per_gradient / (area_c[ic2]*dzc[ic2])
                # A[mic2,mic1] -= flux_per_gradient / (area_c[ic2]*dzc[ic2])

                # A[mic2,mic2]*x[2] + A[mic2,mic1]*x[1] = b[2]
                # ...
                # A[mic2,mic2]*x[2] - flux_per_gradient / (area_c[ic2]*dzc[ic2])*x[1] = b[2]
                # ...
                # A[mic2,mic2]*x[2]  = b[2] + flux_per_gradient / (area_c[ic2]*dzc[ic2])*x[1]
                v = flux_per_gradient / (area_c[ic2] * dzc[ic2])
                if meth is 'dok':
                    A[mic2, mic2] += v
                else:
                    ij.append((mic2, mic2))
                    values.append(v)
                b[mic2] += flux_per_gradient / (area_c[ic2] *
                                                dzc[ic2]) * dirichlet[ic1]

        if self.alpha is not 0:
            for c in range(N):
                if self.is_calc_c[c]:
                    mic = self.c_map[c]
                    v = self.alpha[c] * self.dt
                    if meth is 'dok':
                        A[mic, mic] -= v
                    else:
                        ij.append((mic, mic))
                        values.append(-v)

        # Flux boundary conditions:
        for ic, value, xy in self.neumann_bcs:
            mic = c_map[ic]
            # make mass/time into concentration/step
            # arrived at minus sign by trial and error.
            b[mic] -= value / (area_c[ic2] * dzc[ic2]) * self.dt

        if meth is 'dok':
            self.A = sparse.coo_matrix(A)
        else:
            ijs = np.array(ij, dtype=np.int32)
            data = np.array(values, dtype=np.float64)
            A = sparse.coo_matrix((data, (ijs[:, 0], ijs[:, 1])),
                                  shape=(Ncalc, Ncalc))
            self.A = A

        # report scale to get a sense of whether dt is too large
        Ascale = A.diagonal().min()
        log.debug("Ascale is %s" % Ascale)

        self.b = b
Beispiel #52
0
                proportion_lower = 1. - proj % 1
                A[x + y * shape[0],
                  offset + lower_sensor_pixel] += proportion_lower
                if upper_sensor_pixel != lower_sensor_pixel:
                    proportion_upper = 1. - proportion_lower
                    A[x + y * shape[0],
                      offset + upper_sensor_pixel] += proportion_upper

    return A


if __name__ == "__main__":
    image_y = np.load("hs_tomography_2/y_77_.npy")
    image_alphas = np.load("hs_tomography_2/y_77_alphas.npy").astype("float")

    image_flattened = image_y.flatten()
    c = np.array([-77, -33, -12, -3, 21, 42, 50, 86]).astype("float")

    a = makeA_jens((77, 77), image_alphas).transpose()

    import matplotlib.pyplot as plt
    plt.imshow(a.transpose(), cmap="gray", interpolation="none")
    plt.close()
    # plt.show()
    a_sparse = dok_matrix(a)
    res = lsqr(a_sparse, image_y)
    res_new = res.reshape((77, 77))
    plt.imshow(res_new, cmap="gray")
    plt.show()
    IPython.embed()
def build(um_dict, output_filename, latent_factors, wnmf_iterations,
          user_id_dict, business_id_dict):
    print('id dicts loading')
    with open(user_id_dict, 'r') as f:
        user_id_dict = json.load(f)
    with open(business_id_dict, 'r') as f:
        business_id_dict = json.load(f)
    print('loading um')
    um_dok = sps.dok_matrix((len(user_id_dict), len(business_id_dict)),
                            dtype=np.int8)
    for key_i, value_i in um_dict.items():
        for key_j, value_j in value_i.items():
            um_dok[user_id_dict[key_i], business_id_dict[key_j]] = value_j
    a = um_dok.tocsr()
    missing_u, missing_b = find_missing(set(user_id_dict.keys()),
                                        set(business_id_dict.keys()), um_dict)
    del um_dict
    u = np.random.random(size=(len(user_id_dict), latent_factors))
    v = np.random.random(size=(latent_factors, len(business_id_dict)))
    for i in missing_u:
        u[user_id_dict[i], :] = 0
    for i in missing_b:
        v[:, business_id_dict[i]] = 0
    np.set_printoptions(threshold=np.inf)
    print(u)

    #get nonzero rows, columns
    x, y = a.nonzero()

    #copy um_csr into new sparse matrix
    w = a.copy()
    for i, j in zip(x, y):
        w[i, j] = 1

    i = 0
    prev_norm = 0
    curr_norm = 0
    change = 999999
    print('starting wnmf loop')
    while (i < wnmf_iterations and change > 2):
        print('iteration ' + str(i))
        vt = v.transpose()
        u_num = a * vt
        u_denom = w.multiply(np.matmul(u, v)) * vt
        for ui in range(np.size(u, 0)):
            for uj in range(np.size(u, 1)):
                # print("Old u " + str(ui) + ', ' + str(uj) + ': ' + str(u[ui, uj]))
                #u_denom = w[ui, :].multiply(u[ui, :] * v) * vt[:, uj]
                u[ui, uj] = u[ui, uj] * (u_num[ui, uj] /
                                         (u_denom[ui, uj] + 0.0000001))
                # print("New u " + str(ui) + ', ' + str(uj)  + ': ' + str(u[ui, uj]))
        ut = u.transpose()
        v_num = ut * a
        v_denom = ut * w.multiply(np.matmul(u, v))
        for vi in range(np.size(v, 0)):
            for vj in range(np.size(v, 1)):
                # print("Old v " + str(vi) + ', ' + str(vj) + ': ' + str(v[vi, vj]))
                #v_denom = ut * w[:, vj].multiply(u * v[:, vj])
                v[vi, vj] = v[vi, vj] * (v_num[vi, vj] /
                                         (v_denom[vi, vj] + 0.0000001))
                # print("New v" + str(vi) + ', ' + str(vj)  + ': ' + str(v[vi, vj]))
        # print('U:')
        # print(u)
        # print('V:')
        # print(v)
        i += 1

        # This takes two matrices, multiplies by weight, subtracts them, and then finds its norm2

        uv = np.matmul(u, v)
        uv = w.multiply(uv)

        norm = a - uv
        norm = norm.power(2)
        norm = norm.sum()
        norm = math.sqrt(norm)

        prev_norm = curr_norm
        curr_norm = norm
        change = math.fabs(curr_norm - prev_norm)
        print(change)
    u = pd.DataFrame(u)
    v = pd.DataFrame(v)
    u.to_csv((output_filename + 'u.csv'))
    v.to_csv((output_filename + 'v.csv'))
    log_data = str(i) + ',' + str(change)
    return u, v, log_data
        tags = package_keywords_dict.get(each_dep, [])
        current_tags.extend(tags)
    current_tags_index_wise = [
        map_keywords_dict[keyword] for keyword in current_tags
    ]
    rating_matrix_dict[map_packages_dict[package_name]] = list(
        set(current_tags_index_wise))

print("Delete apckage/dep/key dict")
del (package_dependencies_dict)
del (package_keywords_dict)

print("Generate Sparse Matrix using the Package_Aggregated_Tags_Dict")
# Generate Sparse Matrix using the Package_Aggregated_Tags_Dict

sparse_mat = sp.dok_matrix((len(map_packages_dict), len(map_keywords_dict)),
                           dtype=np.int64)
for package_id, tag_ids in rating_matrix_dict.items():
    sparse_mat[package_id, tag_ids] = 1

print("Generate Sparse Coordinate matrix")
# Generate the Sparse Cooridnate Matrix
sparse_coo = sparse_mat.tocoo()
# print("Delete sparse mat temp")
# del(sparse_mat)
indices = np.mat([sparse_coo.row, sparse_coo.col]).transpose()

print("Genearte the Sparse Tensor using Sparse Coo. Matrix")
# Genearte the Sparse Tensor using Sparse Coo. Matrix
rating_matrix = tf.SparseTensor(indices, sparse_coo.data, sparse_coo.shape)
print("Size of content matrix = {}".format(rating_matrix.get_shape()))
Beispiel #55
0
    def getAMatrix(self):
        '''A is a sparse matrix consisting of response functions. It has
        dimensions #Healpixels by #QSO pairs.'''

        A = dok_matrix((self.Np, self.Nd), dtype=np.float32)
        #sigma=np.sqrt(4*np.pi/(12*self.Nside**2))
        sigma = (np.sqrt(4 * np.pi /
                         (12 * self.Nside**2))) / self.cutoff  ## SY 8/3/19
        print("sigma=", sigma / np.pi * 180, 'deg')

        ## we loop over pixels
        for i, hpix in enumerate(self.pixid):
            ## first find nearby healpixels
            theta, phi = self.pixtheta[i], self.pixphi[i]
            mvec = (sin(theta) * cos(phi), sin(theta) * sin(phi), cos(theta))
            neipixels = hp.query_disc(self.Nside, mvec, self.sradius)
            assert (hpix in neipixels)
            B = np.zeros(self.Nd, type(False))
            for neipix in neipixels:
                B = B | (neipix == self.d.hi1) | (neipix == self.d.hi2)
            s = np.where(B)[0]
            ### so at this point we have for map pixel i, the list of data
            ### pixels that are close enough to matter.
            ### we need to loop over them and get the relevant matrix elements
            qtheta1 = self.q.theta[self.d.i1[s]]
            qphi1 = self.q.phi[self.d.i1[s]]
            qtheta2 = self.q.theta[self.d.i2[s]]
            qphi2 = self.q.phi[self.d.i2[s]]
            ## we are going to employ 3 vectors as a foolproof method
            dx1 = sin(qtheta1) * cos(qphi1) - mvec[0]
            dy1 = sin(qtheta1) * sin(qphi1) - mvec[1]
            dz1 = cos(qtheta1) - mvec[2]
            dr1 = np.sqrt(dx1**2 + dy1**2 + dz1**2)
            ## we have response1 in direction dr1 (normalised by pixel area)
            response1 = (1 / dr1) * (1 - exp(-dr1**2 / (2 * sigma**2)))  ## SY
            ## ditto for q2
            dx2 = sin(qtheta2) * cos(qphi2) - mvec[0]
            dy2 = sin(qtheta2) * sin(qphi2) - mvec[1]
            dz2 = cos(qtheta2) - mvec[2]
            dr2 = np.sqrt(dx2**2 + dy2**2 + dz2**2)
            ## we have response2 in direction dr2
            response2 = (1 / dr2) * (1 - exp(-dr2**2 / (2 * sigma**2)))  ## SY
            ## now we take the difference
            dxr = dx1 * response1 - dx2 * response2
            dyr = dy1 * response1 - dy2 * response2
            dzr = dz1 * response1 - dz2 * response2
            ## the difference in vector
            dx = dx1 - dx2
            dy = dy1 - dy2
            dz = dz1 - dz2
            ## total response is movevement/distance
            totresponse = (dxr * dx + dyr * dy +
                           dzr * dz) / (dx * dx + dy * dy + dz * dz)
            totresponse *= np.sqrt(
                self.d.weight[s])  ## we downweigh response by weight
            A[i, s] = totresponse
            if (i % 100 == 0):
                print(i)

        print("Transposing matrix.")
        A = A.transpose()
        print("A.tocsr")
        A = A.tocsr()
        return A
Beispiel #56
0
    def set_topology(self):
        """
        Use functions of unstructured grid class for remaining topology
        """
        self.nedges = self.grd.Nedges()
        self.ncells = self.grd.Ncells()
        self.nnodes = self.grd.Nnodes()
        self.grd.update_cell_edges()
        self.grd.update_cell_nodes()
        self.grd.edge_to_cells()
        self.grd.cells_area()
        self.grd.cells['_center'] = self.grd.cells_center()
        self.grd.edges['mark'] = 0  # default is internal cell
        self.extern = np.where(np.min(self.grd.edges['cells'], axis=1) < 0)[0]
        self.grd.edges['mark'][self.extern] = 1  # boundary edge
        self.intern = np.where(self.grd.edges['mark'] == 0)[0]
        self.nedges_intern = len(self.intern)  # number of internal edges
        self.exy = self.grd.edges_center()
        self.en = self.grd.edges_normals()
        self.len = self.grd.edges_length()
        # Reflect edge neighbors at boundaries
        ii = self.grd.edge_to_cells().copy()
        nc1 = ii[:, 0]
        nc2 = ii[:, 1]
        ii[:, 0] = np.where(ii[:, 0] >= 0, ii[:, 0], ii[:, 1])
        ii[:, 1] = np.where(ii[:, 1] >= 0, ii[:, 1], ii[:, 0])
        self.edge_to_cells_reflect = ii

        # number of valid sides for each cell
        self.ncsides = np.asarray(
            [sum(jj >= 0) for jj in self.grd.cells['edges']])

        # Used to be in prepare_to_run().
        # But anything that depends only on the grid should be here
        self.set_edge_cell_spacings()  # sets self.dc, dist, alpha
        self.sil = self.get_sign_array()

        # cell center values
        self.ei = np.zeros(self.ncells,
                           np.float64)  # water surface elevation, cells
        self.vi = np.zeros(self.ncells, np.float64)  # cell volumes
        self.pi = np.zeros(self.ncells, np.float64)  # cell wetted areas

        # edge values
        self.uj = np.zeros(self.nedges, np.float64)  # normal velocity at side
        self.qj = np.zeros(self.nedges,
                           np.float64)  # normal velocity*h at side
        self.aj = np.zeros(self.nedges, np.float64)  # edge wet areas
        self.cf = np.zeros(self.nedges, np.float64)  # edge friction coefs
        self.zj = np.zeros(self.nedges,
                           np.float64)  # edge depth -- to replace w/subgrid
        self.cfterm = np.zeros(
            self.nedges, np.float64)  # edge friction coefs - term for matrices

        # Matrix
        self.Ai = sparse.dok_matrix((self.ncells, self.ncells), np.float64)
        self.bi = np.zeros(self.ncells, np.float64)
        self.Ao = sparse.dok_matrix((self.ncells, self.ncells),
                                    np.float64)  # outer iterations
        self.bo = np.zeros(self.ncells, np.float64)
        self.x0 = np.zeros(self.ncells, np.float64)
                                          None,
                                          reduce_func=_reduce_func,
                                          working_memory=2**-16)
    assert isinstance(S_chunks, GeneratorType)
    S_chunks = list(S_chunks)
    assert len(S_chunks) > 1
    # atol is for diagonal where S is explicitly zeroed on the diagonal
    assert_allclose(np.vstack(S_chunks), S, atol=1e-7)


@pytest.mark.parametrize('good_reduce', [
    lambda D, start: list(D),
    lambda D, start: np.array(D),
    lambda D, start: csr_matrix(D),
    lambda D, start: (list(D), list(D)),
    lambda D, start: (dok_matrix(D), np.array(D), list(D)),
])
def test_pairwise_distances_chunked_reduce_valid(good_reduce):
    X = np.arange(10).reshape(-1, 1)
    S_chunks = pairwise_distances_chunked(X,
                                          None,
                                          reduce_func=good_reduce,
                                          working_memory=64)
    next(S_chunks)


@pytest.mark.parametrize(('bad_reduce', 'err_type', 'message'), [
    (lambda D, s: np.concatenate([D, D[-1:]]), ValueError,
     r'length 11\..* input: 10\.'),
    (lambda D, s: (D, np.concatenate([D, D[-1:]])), ValueError,
     r'length \(10, 11\)\..* input: 10\.'),
Beispiel #58
0
def RP_AddExonRemovePromoter(peaks_info, genes_info_full, genes_info_tss,
                             decay):
    """Multiple processing function to calculate regulation potential."""

    Sg = lambda x: 2**(-x)
    checkInclude = lambda x, y: all([x >= y[0], x <= y[1]])
    gene_distance = 15 * decay
    genes_peaks_score_array = sp_sparse.dok_matrix(
        (len(genes_info_full), len(peaks_info)), dtype=np.float64)
    peaks_info_inbody = []
    peaks_info_outbody = []

    w = genes_info_full + peaks_info
    A = {}

    w.sort()
    #     print(w[:100])
    for elem in w:
        if elem[-3] == 1:
            A[elem[-1]] = elem
        else:
            dlist = []
            for gene_name in list(A.keys()):
                g = A[gene_name]
                ### NOTE: main change here
                ### if peak center in the gene area
                if all([g[0] == elem[0], elem[1] >= g[1], elem[1] <= g[2]]):
                    ### if peak center in the exons
                    if any(
                            list(
                                map(checkInclude, [elem[1]] * len(g[5]),
                                    list(g[5])))):
                        genes_peaks_score_array[gene_name,
                                                elem[-1]] = 1.0 / g[-4]
                        peaks_info_inbody.append(elem)
                    ### if peak cencer in the promoter
                    elif checkInclude(elem[1], g[4]):
                        tmp_distance = abs(elem[1] - g[3])
                        genes_peaks_score_array[gene_name, elem[-1]] = Sg(
                            tmp_distance / decay)
                        peaks_info_inbody.append(elem)
                    ### intron regions
                    else:
                        continue
                else:
                    dlist.append(gene_name)
            for gene_name in dlist:
                del A[gene_name]

    ### remove genes in promoters and exons
    peaks_info_set = [tuple(i) for i in peaks_info]
    peaks_info_inbody_set = [tuple(i) for i in peaks_info_inbody]
    peaks_info_outbody_set = list(
        set(peaks_info_set) - set(peaks_info_inbody_set))
    peaks_info_outbody = [list(i) for i in peaks_info_outbody_set]

    print("peaks number: ", len(peaks_info_set))
    print("peaks number in gene promoters and exons: ",
          len(set(peaks_info_inbody_set)))
    print("peaks number out gene promoters and exons:",
          len(peaks_info_outbody_set))

    w = genes_info_tss + peaks_info_outbody
    A = {}

    w.sort()
    for elem in w:
        if elem[-3] == 1:
            A[elem[-1]] = elem
        else:
            dlist = []
            for gene_name in list(A.keys()):
                g = A[gene_name]
                tmp_distance = elem[1] - g[1]
                if all([g[0] == elem[0], tmp_distance <= gene_distance]):
                    genes_peaks_score_array[gene_name, elem[-1]] = Sg(
                        tmp_distance / decay)
                else:
                    dlist.append(gene_name)
            for gene_name in dlist:
                del A[gene_name]

    w.reverse()
    for elem in w:
        if elem[-3] == 1:
            A[elem[-1]] = elem
        else:
            dlist = []
            for gene_name in list(A.keys()):
                g = A[gene_name]
                tmp_distance = g[1] - elem[1]
                if all([g[0] == elem[0], tmp_distance <= gene_distance]):
                    genes_peaks_score_array[gene_name, elem[-1]] = Sg(
                        tmp_distance / decay)
                else:
                    dlist.append(gene_name)
            for gene_name in dlist:
                del A[gene_name]

    return (genes_peaks_score_array)
Beispiel #59
0
    def solve_via_data(self, data, warm_start, verbose, solver_opts, solver_cache=None):
        import cplex

        c = data[s.C]
        b = data[s.B]
        A = dok_matrix(data[s.A])
        # Save the dok_matrix.
        data[s.A] = A
        dims = dims_to_solver_dict(data[s.DIMS])

        n = c.shape[0]

        model = cplex.Cplex()
        variables = []
        # cpx_constrs will contain CpxConstr namedtuples (see above).
        cpx_constrs = []
        vtype = []
        if data[s.BOOL_IDX] or data[s.INT_IDX]:
            for i in range(n):
                # Set variable type.
                if i in data[s.BOOL_IDX]:
                    vtype.append('B')
                elif i in data[s.INT_IDX]:
                    vtype.append('I')
                else:
                    vtype.append('C')
        else:
            # If we specify types (even with 'C'), then the problem will
            # be interpreted as a MIP. Leaving vtype as an empty list
            # here, will ensure that the problem type remains an LP.
            pass
        # Add the variables in a batch
        variables = list(model.variables.add(
            obj=[c[i] for i in range(n)],
            lb=[-cplex.infinity]*n,  # default LB is 0
            ub=[cplex.infinity]*n,
            types="".join(vtype),
            names=["x_%d" % i for i in range(n)]))

        # Add equality constraints
        cpx_constrs += [_CpxConstr(_LIN, x)
                        for x in self.add_model_lin_constr(
                                model, variables,
                                range(dims[s.EQ_DIM]),
                                'E', A, b)]

        # Add inequality (<=) constraints
        leq_start = dims[s.EQ_DIM]
        leq_end = dims[s.EQ_DIM] + dims[s.LEQ_DIM]
        cpx_constrs += [_CpxConstr(_LIN, x)
                        for x in self.add_model_lin_constr(
                                model, variables,
                                range(leq_start, leq_end),
                                'L', A, b)]

        # Add SOC constraints
        soc_start = leq_end
        for constr_len in dims[s.SOC_DIM]:
            soc_end = soc_start + constr_len
            soc_constr, new_leq, new_vars = self.add_model_soc_constr(
                model, variables, range(soc_start, soc_end), A, b)
            cpx_constrs.append(_CpxConstr(_QUAD, soc_constr))
            cpx_constrs += [_CpxConstr(_LIN, x) for x in new_leq]
            variables += new_vars
            soc_start += constr_len

        # Set verbosity
        if not verbose:
            hide_solver_output(model)

        # For CVXPY, we set the qcpduals parameter here, but the user can
        # easily override it via the "cplex_params" solver option (see
        # set_parameters function).
        model.parameters.preprocessing.qcpduals.set(
            model.parameters.preprocessing.qcpduals.values.force)

        # Set parameters
        set_parameters(model, solver_opts)

        # Solve problem
        solution = {"model": model}
        try:
            start_time = model.get_time()
            model.solve()
            solution[s.SOLVE_TIME] = model.get_time() - start_time
        except Exception:
            pass

        return solution
def MottonenStatePreparation(state_vector, wires):
    r"""
    Prepares an arbitrary state on the given wires using a decomposition into gates developed
    by Möttönen et al. (Quantum Info. Comput., 2005).

    The state is prepared via a sequence
    of "uniformly controlled rotations". A uniformly controlled rotation on a target qubit is
    composed from all possible controlled rotations on said qubit and can be used to address individual
    elements of the state vector. In the work of Mottonen et al., the inverse of their state preparation
    is constructed by first equalizing the phases of the state vector via uniformly controlled Z rotations
    and then rotating the now real state vector into the direction of the state :math:`|0\rangle` via
    uniformly controlled Y rotations.

    This code is adapted from code written by Carsten Blank for PennyLane-Qiskit.

    Args:
        state_vector (array): Input array of shape ``(2^N,)``, where N is the number of wires
            the state preparation acts on. ``N`` must be smaller or equal to the total
            number of wires.
        wires (Sequence[int]): sequence of qubit indices that the template acts on

    Raises:
        ValueError: if inputs do not have the correct format
    """

    ###############
    # Input checks
    wires, n_wires = _check_wires(wires)

    msg = "The state vector must be of size {}; got {}.".format(
        2**n_wires, len(state_vector))
    _check_shape(state_vector, (2**n_wires, ), msg=msg)

    # check if state_vector is normalized
    if isinstance(state_vector[0], Variable):
        state_vector_values = [s.val for s in state_vector]
        norm = np.sum(np.abs(state_vector_values)**2)
    else:
        norm = np.sum(np.abs(state_vector)**2)

    if not np.isclose(norm, 1.0, atol=1e-3):
        raise ValueError(
            "State vector probabilities have to sum up to 1.0, got {}".format(
                norm))
    #######################

    # Change ordering of indices, original code was for IBM machines
    state_vector = np.array(state_vector).reshape(
        [2] * n_wires).T.flatten()[:, np.newaxis]
    state_vector = sparse.dok_matrix(state_vector)

    wires = np.array(wires)

    a = sparse.dok_matrix(state_vector.shape)
    omega = sparse.dok_matrix(state_vector.shape)

    for (i, j), v in state_vector.items():
        if isinstance(v, Variable):
            a[i, j] = np.absolute(v.val)
            omega[i, j] = np.angle(v.val)
        else:
            a[i, j] = np.absolute(v)
            omega[i, j] = np.angle(v)
    # This code is directly applying the inverse of Carsten Blank's
    # code to avoid inverting at the end

    # Apply y rotations
    for k in range(n_wires, 0, -1):
        alpha_y_k = _get_alpha_y(a, n_wires, k)  # type: sparse.dok_matrix
        control = wires[k:]
        target = wires[k - 1]
        _uniform_rotation_y_dagger(alpha_y_k, control, target)

    # Apply z rotations
    for k in range(n_wires, 0, -1):
        alpha_z_k = _get_alpha_z(omega, n_wires, k)
        control = wires[k:]
        target = wires[k - 1]
        if len(alpha_z_k) > 0:
            _uniform_rotation_z_dagger(alpha_z_k, control, target)