Exemple #1
0
def qubit2_conv_comp_base_rep_to_svd_diag_rep(rho: np.ndarray):
    a, b, t = qubit2_conv_comp_base_rep_to_hilbert_schmidt_rep(rho)
    u, s, vh = np.linalg.svd(t)
    if MF.check_matrix_symmetric(t):
        print(concurrency(rho))
    if MF.check_matrixy_antisymmetric(t):
        print("nant")
    return np.dot(u.T, a).real, np.dot(vh, b).real, s, u, vh
Exemple #2
0
 def test_analyse_matrix_sparse(self):
     m1 = np.zeros((100, 100))
     m1[0][0] = 0.0004
     non_zero, N_total = MF.analyse_matrix_sparse(m1)
     self.assertTrue(non_zero == 1)
     self.assertTrue(N_total == 100 * 100)
     non_zero, N_total = MF.analyse_matrix_sparse(m1, rtol_places=2)
     self.assertTrue(non_zero == 0)
     m2 = np.ones((20, 20))
     non_zero, N_total = MF.analyse_matrix_sparse(m2, rtol_places=2)
     self.assertTrue(non_zero == N_total)
Exemple #3
0
def make_pure_random_2qbit_density_matrix_by_unitary_partial_trace2(p, n):
    """Create an ensemble of density matrices by first constructing an Seperable
    state of 8 uncoupled qubits, then applying a random unitary matrix and then
    tracing out to 2 qbits. The state should in principle be closer to the
    totally mixed state than in the procedure with only 4 uncoupled qubits"""
    N = 2**8
    rho = np.zeros((N, N)) + 0.j
    rho += np.kron(
        make_random_1qubit_density_matrix(1),
        np.kron(
            make_random_1qubit_density_matrix(1),
            np.kron(
                make_random_1qubit_density_matrix(1),
                np.kron(
                    make_random_1qubit_density_matrix(1),
                    np.kron(
                        make_random_1qubit_density_matrix(1),
                        np.kron(
                            make_random_1qubit_density_matrix(1),
                            np.kron(make_random_1qubit_density_matrix(1),
                                    make_random_1qubit_density_matrix(1))))))))
    U = MF.make_matrix_random_unitary(N, N)
    rho = np.dot(U, np.dot(rho, np.conjugate(np.transpose(U))))
    rho = partial_trace(
        partial_trace(
            partial_trace(partial_trace(partial_trace(partial_trace(rho))))))
    return rho
Exemple #4
0
def fitnessFunction(individual, algorithm, n_labels, ground_truth, m1, m2, m3):

    w1 = individual[0]
    w2 = individual[1]
    # w3 = individual[2]
    w3 = 0
    corr = mf.calculateCorrelationMatrix(m1, m2, m3, w1, w2, w3)

    if algorithm == 'complete':
        agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(corr)
        labels = agglomerative.labels_
    elif algorithm == 'average':
        agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='average').fit(corr)
        labels = agglomerative.labels_
    elif algorithm == 'kmedoids':
        _, clusters = km.kMedoids(corr, n_labels, 100)
        labels = km.sortLabels(clusters)
    
    #fitness = metrics.homogeneity_score(labels, ground_truth)
    #fitness = metrics.adjusted_rand_score(labels, ground_truth)
    #fitness = sum(individual)
    fitness = metrics.calinski_harabaz_score(corr, labels)

    corr = None
    m1 = None
    m2 = None
    m3 = None

    return fitness
def mutual_information(p_x: np.ndarray, p_yx: np.ndarray) -> float:
    assert MF.check_matrix_simple_stochastic(p_yx), "conditional matrix p_yx is not \
    simple stochastic."
    assert check_l1_norm(p_x), "p_x is not l1-normed"
    p_y = np.dot(np.transpose(p_yx), p_x)
    assert check_l1_norm(p_y), "p_y is not l1-normed"
    return classical_entropy(p_y) - conditional_entropy(p_x, p_yx)
Exemple #6
0
def create_random_ensemble_arcsin(N: int, K: int,
                                  size: int) -> List[np.ndarray]:
    """TODO: Need more verification.
    @see: ZyPeNeCo2011"""
    rho_list = []
    n = int(N / 2)
    phi1 = create_maximally_entangled_state(N, 0, 0)
    for i in range(size):
        U = np.zeros((N**2, N**2)) + 0.j
        for k in range(1, K):
            U += np.kron(np.eye(N), MF.make_matrix_random_unitary(N, N))
            #U += np.kron(MF.make_matrix_random_unitary(N, N), np.eye(N))
        phi2 = np.dot(U, phi1)

        psi = (phi1 + phi2)
        psi /= np.sqrt(np.dot(psi, psi))

        rho = density_matrix(psi)
        for k in range(n):
            rho = partial_trace(rho) / np.trace(rho)
        rho /= np.trace(rho)
        assert check_density_operator(rho), "rho is not a density matrix"
        rho_list.append(rho)

    return rho_list
def conditional_entropy(p_x: np.ndarray, p_yx: np.ndarray) -> float:
    assert MF.check_matrix_simple_stochastic(p_yx), "conditional matrix p_yx is not \
    simple stochastic."
    assert check_l1_norm(p_x), "p_x is not l1-normed"
    H = 0
    for i, p in enumerate(p_x):
        H += p * classical_entropy(p_yx[i])
    return H
Exemple #8
0
def create_random_ensemble_pure(N: int, n=4) -> List[np.ndarray]:
    list = []
    for i in range(N):
        rho = np.kron(make_random_1qubit_density_matrix(1),
                      make_random_1qubit_density_matrix(1))
        U = MF.make_matrix_random_unitary(4, 4)
        rho = np.dot(U, np.dot(rho, np.conjugate(np.transpose(U))))
        list.append(rho)
    return list
Exemple #9
0
def make_pure_random_2qbit_density_matrix_by_unitary(p, n):
    rho = np.zeros((4, 4)) + 0.j
    pp = np.random.rand(n)
    pp = pp / np.sum(pp)
    for i in range(n):
        rho += np.kron(make_random_1qubit_density_matrix(p),
                       make_random_1qubit_density_matrix(p)) * pp[i]
    U = MF.make_matrix_random_unitary(4, 4)
    rho = np.dot(U, np.dot(rho, np.conjugate(np.transpose(U))))
    return rho
Exemple #10
0
def make_cue_matrix(N: int) -> np.ndarray:
    """
    mu = 0
    ep = 1/np.sqrt(N)
    sigma = ep**2 / 8
    K = np.random.normal(mu, sigma**0.5, (N ,N)) + 0.j
    H = K + np.transpose(K)"""
    m = MF.make_matrix_ginibre(N)
    H, R = np.linalg.qr(m)
    return H
def get_conditional_prob_from_joint_prob(p_xy: np.ndarray) -> np.ndarray:
    assert check_joint_probability_matrix(p_xy), "p_xy is not a joint probability \
    matrix."
    size_a, size_b = p_xy.shape
    p_x = np.dot(p_xy, np.ones(size_b))
    assert check_l1_norm(p_x), "p_x is not l1-normed."
    for i in range(size_a):
        p_xy[i] *= 1/p_x[i]
    assert MF.check_matrix_simple_stochastic(p_xy), "after calculation the matrix \
    is not a conditional probability matrix"
    return p_xy
Exemple #12
0
 def test_check_matrix_triagonal(self):
     m1 = MF.make_matrix_random_triagonal(3, 5)
     m2 = MF.make_matrix_random_triagonal(3, 5).T
     self.assertTrue(MF.check_matrix_triagonal(m1))
     self.assertTrue(MF.check_matrix_triagonal(m2))
     m3 = MF.make_matrix_random_unitary(5, 5)
     self.assertFalse(MF.check_matrix_triagonal(m3))
Exemple #13
0
def make_pure_random_2qbit_density_matrix_by_unitary_partial_trace(p, n):
    """Create an ensemble of density matrices by first constructing an Seperable
    state of 4 uncoupled qubits, then applying a random unitary matrix and then
    tracing out 2 qbits."""
    rho = np.zeros((16, 16)) + 0.j
    rho += np.kron(
        make_random_1qubit_density_matrix(1),
        np.kron(
            make_random_1qubit_density_matrix(1),
            np.kron(make_random_1qubit_density_matrix(1),
                    make_random_1qubit_density_matrix(1))))
    U = MF.make_matrix_random_unitary(16, 16)
    rho = np.dot(U, np.dot(rho, np.conjugate(np.transpose(U))))
    rho = partial_trace(partial_trace(rho))
    return rho
Exemple #14
0
def fitness(indv):
    w1, w2, w3 = indv.solution
    #return x + 10*sin(5*x) + 7*cos(4*x)
    corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3)
    agglomerative = AgglomerativeClustering(affinity='precomputed',
                                            n_clusters=n_labels,
                                            linkage='complete').fit(corr)
    labels = agglomerative.labels_

    metrics = cl.clusterEvaluation(corr, labels, ground_truth)

    if metrics[0] <= 0:
        return 1

    print(metrics[0] * 100)
    return float(metrics[0]) * 100
Exemple #15
0
def fitness(indv):
    w1, w2, w3 = indv.solution
    corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3)
    if algorithm == 'complete':      
        agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(corr)
        labels = agglomerative.labels_
    elif algorithm == 'average':
        agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='average').fit(corr)
        labels = agglomerative.labels_
    elif algorithm == 'kmedoids':
        _, clusters = km.kMedoids(corr, n_labels, 100)
        labels = km.sortLabels(clusters)

    metrics = ce.clusterEvaluation(corr, labels, ground_truth)

	writer.write(str(current_iteration)+': '+str(w1)+' '+str(w2)+' '+str(w3)+' '.join(str(x) for x in metrics)+'\n')
Exemple #16
0
def fitness(indv):
    w1, w2, w3 = indv.solution
    corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3)
    if algorithm == 'complete':
        agglomerative = AgglomerativeClustering(affinity='precomputed',
                                                n_clusters=n_labels,
                                                linkage='complete').fit(corr)
        labels = agglomerative.labels_
    elif algorithm == 'average':
        agglomerative = AgglomerativeClustering(affinity='precomputed',
                                                n_clusters=n_labels,
                                                linkage='average').fit(corr)
        labels = agglomerative.labels_
    elif algorithm == 'kmedoids':
        _, clusters = km.kMedoids(corr, n_labels, 100)
        labels = km.sortLabels(clusters)

    metrics = ce.clusterEvaluation(corr, labels, ground_truth)

    return float(metrics[0]) * 100
Exemple #17
0
        sample_for_domains = spl
        sample = str(spl) + '.'

        matrix1 = rs.loadMatrixFromFile(sample, measure1)
        matrix2 = rs.loadMatrixFromFile(sample, measure2)
        matrix3 = rs.loadMatrixFromFile(sample, measure3)

        domains = rs.loadDomainListFromFile(sample)

        n_labels = scop.getUniqueClassifications(sample_for_domains)

        ground_truth = scop.getDomainLabels(domains)
        ground_truth = map(int, ground_truth)
        ground_truth = list(map(int, ground_truth))

        matrix1 = mf.minMaxScale(matrix1)
        matrix2 = mf.minMaxScale(matrix2)
        matrix3 = mf.minMaxScale(matrix3)

        matrix1 = mf.calculateDistances(matrix1)
        matrix2 = mf.calculateDistances(matrix2)
        matrix3 = mf.calculateDistances(matrix3)

        for w1 in np.arange(0.05, 1.05, 0.05):

            w2 = 0
            w3 = 1 - w1

            corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1,
                                                 w2, w3)
Exemple #18
0
#####################################################
# LOAD PROTEIN DATA
#####################################################

path_to_results = 'C:/ShareSSD/scop/tests/'
measure1 = 'rmsd'
measure2 = 'gdt_2'
measure3 = 'seq'
algorithm = 'complete'
sample = 'a.1.'
sample_for_domains = 'a.1'
matrix1 = rs.loadMatrixFromFile(sample, measure1)
matrix2 = rs.loadMatrixFromFile(sample, measure2)
matrix3 = rs.loadMatrixFromFile(sample, measure3)
matrix1 = mf.minMaxScale(matrix1)
matrix2 = mf.minMaxScale(matrix2)
matrix3 = mf.minMaxScale(matrix3)
matrix1 = mf.calculateDistances(matrix1)
matrix2 = mf.calculateDistances(matrix2)
matrix3 = mf.calculateDistances(matrix3)
domains = rs.loadDomainListFromFile(sample_for_domains)
n_labels = scop.getUniqueClassifications(sample_for_domains)
ground_truth = scop.getDomainLabels(domains)

#####################################################
# GENETIC ALGORITHM CONFIG
#####################################################

# Define population.
indv_template = DecimalIndividual(ranges=[(0.1, 0.9),(0.1, 0.9),(0.1, 0.9)], eps=[0.001,0.001,0.001])
Exemple #19
0
 def test_check_matrix_diagonal(self):
     m = np.eye(5)
     self.assertTrue(MF.check_matrix_diagonal(m))
     m[0][1] = 1
     self.assertFalse(MF.check_matrix_diagonal(m))
Exemple #20
0
def readDistances(sample, measure):
    path_to_matrix = 'C:/ShareSSD/scop/data_old/sim_' + sample + '_' + measure

    counter = 0
    matrix = []

    with open(path_to_matrix, 'r') as fp:

        size = 0
        domains = []

        #get number of structures
        line = fp.readline()
        while line:
            if 'PDB  :' in str(line):
                if '#' not in str(line):
                    size += 1
                    domain = str(line).strip().split()[-1].split('/')[-1]
                    domains.append(str(domain))
            if 'Distance records' in str(line):
                break
            line = fp.readline()

        #get distance matrix
        while line:
            if 'DIST :' in str(line):
                if '#' not in str(line):
                    print(line)
                    parsed = str(line).strip().split()
                    current_row = parsed[2]
                    value = float(parsed[4])

                    while current_row == parsed[2]:

                        matrix.append(value)
                        line = fp.readline()
                        parsed = str(line).strip().split()
                        value = float(parsed[4])

                    counter += 1

                    i = 0
                    while i < counter:
                        matrix.append(0)
                        i += 1
                    i = 0

                    matrix.append(value)

            line = fp.readline()

    counter += 1

    i = 0
    while i < counter:
        matrix.append(0)
        i += 1
    i = 0

    matrix = np.asmatrix(matrix)
    matrix = matrix.reshape(size, size - 1)
    n, _ = matrix.shape
    X0 = np.zeros((n, 1))
    matrix = np.hstack((X0, matrix))

    #np.savetxt("C:/ShareSSD/scop/data_old/matrix2.txt", matrix,delimiter=' ', newline='\n')

    #if 'rmsd' in path_to_matrix:
    #matrix = matrix/(matrix.max()/1)

    matrix = mf.symmetrizeMatrix(matrix)

    #if 'gdt' in path_to_matrix:
    #matrix = mf.processGDTMatrix(matrix)

    #np.savetxt("C:/ShareSSD/scop/data_old/matrix3.txt", matrix,delimiter=' ', newline='\n')

    return domains, matrix
from sklearn.cluster import AgglomerativeClustering

# load protein data before loop
path_to_results = 'C:/ShareSSD/scop/clustering_results/'
measure1 = 'gdt_2'
measure2 = 'gdt_2'
measure3 = 'seq'
sample = 'a.3.'
sample_for_domains = 'a.3'

matrix1 = rs.loadMatrixFromFile(sample, measure1)
matrix2 = rs.loadMatrixFromFile(sample, measure2)
matrix3 = rs.loadMatrixFromFile(sample, measure3)

matrix1 = mf.minMaxScale(matrix1)
matrix2 = mf.minMaxScale(matrix2)
matrix3 = mf.minMaxScale(matrix3)

#experimentar divisoes com medias e desvio padrao

#matrix1 = mf.calculateDistances(matrix1)
#matrix2 = mf.calculateDistances(matrix2)
#matrix3 = mf.calculateDistances(matrix3)

domains = rs.loadDomainListFromFile(sample_for_domains)

# read existing labels
n_labels = scop.getUniqueClassifications(sample_for_domains)
ground_truth = scop.getDomainLabels(domains)
def readDistances(sample, measure):
    path_to_matrix = 'C:/ShareSSD/scop/data/sim_' + sample + '_' + measure
    path_to_values = 'C:/ShareSSD/scop/values_' + sample + '_' + measure

    counter = 0
    matrix = []

    with open(path_to_matrix, 'r') as fp:

        size = 0
        domains = []

        #get structures
        line = fp.readline()
        while line:
            if 'PDB  :' in str(line):
                if '#' not in str(line):
                    size += 1
                    domain = str(line).strip().split()[-1].split('/')[-1]
                    domains.append(str(domain))
            if 'Distance records' in str(line):
                break
            line = fp.readline()

        #get distance matrix
        while line:
            if 'DIST :' in str(line):
                if '#' not in str(line):
                    print(line)
                    parsed = str(line).strip().split()
                    current_row = parsed[2]
                    value = float(parsed[4])

                    while current_row == parsed[2]:

                        matrix.append(value)
                        line = fp.readline()
                        parsed = str(line).strip().split()
                        value = float(parsed[4])

                    counter += 1

                    i = 0
                    while i < counter:
                        matrix.append(0)
                        i += 1
                    i = 0

                    matrix.append(value)

            line = fp.readline()

    counter += 1

    i = 0
    while i < counter:
        matrix.append(0)
        i += 1
    i = 0

    matrix = np.asmatrix(matrix)
    matrix = matrix.reshape(size, size - 1)
    n, m = matrix.shape
    X0 = np.zeros((n, 1))
    matrix = np.hstack((X0, matrix))

    # np.savetxt("C:/ShareSSD/scop/data_old2/kernel_"+measure, matrix, delimiter=' ', newline='\n')

    # if 'rmsd' in path_to_matrix:
    #     matrix = matrix/(matrix.max()/1)

    # if 'gdt' in path_to_matrix:
    #     matrix = mf.processgdtmatrix(matrix)

    matrix = mf.symmetrizeMatrix(matrix)

    # save kernel values
    with open(path_to_values, 'w') as nf:
        for i in range(0, n):
            for j in range(0, m):
                nf.write(str(matrix[i, j]) + '\n')

    # np.savetxt("C:/ShareSSD/scop/data_old2/matrix_"+measure, matrix, delimiter=' ', newline='\n')
    matrix.dump('C:/ShareSSD/scop/matrix_' + sample + '_' + measure)
    return domains, matrix
Exemple #23
0
ground_truth = list(map(int, ground_truth))

#matrix1 = mf.minMaxScale(matrix1)
#matrix2 = mf.minMaxScale(matrix2)
#matrix3 = mf.minMaxScale(matrix3)

# matrix1 = mf.calculatedistances(matrix1)
# matrix2 = mf.calculatedistances(matrix2)
# matrix3 = mf.calculatedistances(matrix3)

#for w1 in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]:
w1 = 0.5
w2 = 0.5
w3 = 0

corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3)

# Hierarchical
for link in ['complete', 'average']:
    agglomerative = AgglomerativeClustering(affinity='precomputed',
                                            n_clusters=n_labels,
                                            linkage='complete').fit(corr)
    labels = agglomerative.labels_
    metrics = cl.clusterEvaluation(corr, labels, ground_truth)
    cl.saveResults(measure1, measure2, 'hierarchical_' + link, sample, metrics)

# K-Medoids
medoids, clusters = km.kMedoids(corr, n_labels, 100)
labels = km.sortLabels(clusters)
metrics = cl.clusterEvaluation(corr, labels, ground_truth)
cl.saveResults(measure1, measure2, 'kmedoids', sample, metrics)
Exemple #24
0
 def test_operation_matrix_commutation(self):
     m1 = np.array([[0, 1], [1, 0]]) + 0.j
     m2 = np.array([[1, 0], [0, -1]]) + 0.j
     m3 = np.array([[0, -1.j], [1.j, 0]])
     self.assertTrue(
         np.allclose(MF.operation_matrix_commutation(m1, m2), -2.j * m3))
Exemple #25
0
n = scop.getUniqueClassifications('a.1')

measure1 = 'seq'
measure2 = 'seq'
#measure2 = 'maxsub'

#read matrices
domains, matrix1 = ff.readDistances('a.1.', 'rmsd')
matrix1 = ff.loadMatrixFromFile('a.1.', measure1)
#matrix2 = matrix1
#domains, matrix2 = ff.readDistances('a.1.', measure2)
matrix2 = ff.loadMatrixFromFile('a.1.', measure2)

ground_truth = scop.getDomainLabels(domains)

matrix1 = mf.calculateDistances(matrix1, matrix1)
matrix2 = mf.calculateDistances(matrix2, matrix2)

for w1 in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]:

    corr = mf.calculateCorrelation(w1, matrix1, matrix2)

    for link in ['complete','average']:

        with open(path_to_results+'euclhiearchical_'+link+'_'+str(w1)+'_'+measure1+'_'+measure2,'w') as file:

            agglo = AgglomerativeClustering(affinity='precomputed', n_clusters=n, linkage=link).fit(corr)
            labels = agglo.labels_
            metrics = ce.clusterEvaluation(corr, labels, ground_truth)

            print(w1)
from sklearn.cluster import AgglomerativeClustering

# load protein data before loop
path_to_results = '/home/pedro/Desktop/scop/clustering_results/'
measure1 = 'rmsd'

sample = 'a.1.'
sample_for_domains = 'a.1'
X = rs.loadMatrixFromFile(sample, measure1)

mean = 1.79197547637771
std_dev = 0.669382812243833

#X = (X - (mean/std_dev))
X = mf.minMaxScale(X)
X = mf.calculateDistances(X)

domains = rs.loadDomainListFromFile(sample_for_domains)

# read existing labels
n_labels = scop.getUniqueClassifications(sample_for_domains)
ground_truth = scop.getDomainLabels(domains)

ground_truth = map(int, ground_truth)
ground_truth = list(map(int, ground_truth))

X = np.asmatrix(X)

M, C = kmedoids.kMedoids(X, n_labels, 100)
Exemple #27
0
    def test_check_matrix_unitary(self):
        m1 = np.array([[1 + 1.j, 2, 3], [4, 5, 6]])
        m2 = MF.make_matrix_random_unitary(5, 6)

        self.assertFalse(MF.check_matrix_unitary(m1))
        self.assertTrue(MF.check_matrix_unitary(m2))
Exemple #28
0
from sklearn.cluster import AgglomerativeClustering

# load protein data before loop
path_to_results = 'C:/ShareSSD/scop/clustering_results/'
measure1 = 'rmsd'

sample = 'a.1.'
sample_for_domains = 'a.1'
X = rs.loadMatrixFromFile(sample, measure1)

mean = 1.79197547637771
std_dev = 0.669382812243833

#X = (X - (mean/std_dev))

X = mf.calculateDistances(X)

domains = rs.loadDomainListFromFile(sample)

# read existing labels
n_labels = scop.getUniqueClassifications(sample_for_domains)
ground_truth = scop.getDomainLabels(domains)

ground_truth = map(int, ground_truth)
ground_truth = list(map(int, ground_truth))

X = np.asmatrix(X)

agglomerative = AgglomerativeClustering(affinity='precomputed',
                                        n_clusters=n_labels,
                                        linkage='complete').fit(X)
Exemple #29
0
def make_random_density_matrix_from_ginibre(N: int) -> np.ndarray:
    """Creates a random density matrix of size N from a ginibre matrix.
    @see MatrixFunctions.make_matrix_ginibre"""
    m = MF.make_matrix_ginibre(N)  # Defines Measure of matrix and positiv
    m2 = np.dot(np.conjugate(m.T), m)  # Make hermitian
    return m2 / np.trace(m2)  # Trace to one normation
def readSimilaritiesToMatrix(sample, measure):

    path_to_matrix = 'C:/ShareSSD/scop/data/values_' + sample + '_' + measure
    path_to_domains = 'C:/ShareSSD/scop/data/domains_' + sample

    counter = 0
    matrix = []
    row = []

    with open(path_to_matrix, 'r') as fp:

        domains = set()

        line = fp.readline()
        while line:

            if 'END' in line:
                break

            parsed = str(line).strip().split(' ')
            structure1 = parsed[0]
            structure2 = parsed[1]
            value = float(parsed[2])

            domains.add(structure1)
            domains.add(structure2)

            # add the respective amount of zeroes to the current row
            counter += 1
            i = 0
            while i < counter:
                row.append(0)
                i += 1
            i = 0

            # track the current structure and read its alignments
            current_row = parsed[0]
            while current_row == parsed[0] and line:
                row.append(value)
                line = fp.readline()
                if 'END' in line:
                    break
                parsed = str(line).strip().split(' ')
                print(parsed)
                value = float(parsed[2])

            matrix.append(row)
            row = []

        line = fp.readline()

    counter += 1

    i = 0
    while i < counter:
        row.append(0)
        i += 1
    i = 0
    matrix.append(row)

    matrix = np.asmatrix(matrix)

    # symmetrize and write results to file
    matrix = mf.symmetrizeMatrix(matrix)
    matrix = np.matrix(matrix)
    matrix.dump("C:/ShareSSD/scop/data/matrix_" + sample + '_' + measure)

    # write domain list to file
    if not os.path.isfile(path_to_domains):
        with open(path_to_domains, 'w') as nf:
            domains = list(domains)
            for domain in domains:
                nf.write(domain + '\n')
            nf.write('END')