Esempio n. 1
0
                     sys.exit(
                         "Error!!  Can't find reference sequence...")
         else:
             try:
                 print(
                     "Finding reference sequence using global MSAsearch..."
                 )
                 i_ref = sca.MSAsearch(headers_full, sequences_full,
                                       seq_pdb)
                 options.i_ref = i_ref
                 print("reference sequence index is: %i" % (i_ref))
                 print(headers_full[i_ref])
                 print(sequences_full[i_ref])
             except:
                 sys.exit("Error!!  Can't find reference sequence...")
         sequences, ats = sca.makeATS(sequences_full, ats_pdb, seq_pdb,
                                      i_ref, options.truncate)
         dist_new = np.zeros((len(ats), len(ats)))
         for (j, pos1) in enumerate(ats):
             for (k, pos2) in enumerate(ats):
                 if k != j:
                     if (pos1 == '-') or (pos2 == '-'):
                         dist_new[j, k] == 1000
                     else:
                         ix_j = ats_pdb.index(pos1)
                         ix_k = ats_pdb.index(pos2)
                         dist_new[j, k] = dist_pdb[ix_j, ix_k]
         dist_pdb = dist_new
     except:
         sys.exit("Error!!! Something wrong with PDBid or path...")
 elif options.refseq is not None:
     print("Finding reference sequence using provided sequence file...")
Esempio n. 2
0
	# Create the ATS
        i_ref = options.i_ref
        print "Reference sequence %i:" % (i_ref)
        print headers_full[i_ref]
        s_tmp = sequences_full[i_ref]

        try:
                f = open(options.refpos,'r')
                ats_tmp = [line.rstrip('\n') for line in f]
                print ats_tmp
                f.close()
        except:
                sys.exit("Error!! Unable to read reference positions!")
        try:
                sequences, ats = sca.makeATS(sequences_full, ats_tmp, s_tmp, i_ref)
        except:
                sys.exit("Error!! Unable to make ATS!")

	print "Final alignment parameters:"
	print "Number of positions: L = %i" % (len(s_tmp))
        print "Size of ats: %i" % len(ats)

	# saving the important stuff. Everything is stored in a file called [MSAname]_sequence.db. 
	path_list = options.alignment.split(os.sep)
	fn = path_list[-1]
	fn_noext = fn.split(".")[0]

	D = {}
	D['alg'] = sequences_full
	D['hd'] = headers_full
Esempio n. 3
0
                     print_("reference sequence index is: {:d}".format(i_ref))
                     print_(headers_full[i_ref])
                     print_(sequences_full[i_ref])
                 except:
                     sys.exit("Error!!  Can't find reference sequence...")
         else:
             try:
                 print_("Finding reference sequence using global MSAsearch...")
                 i_ref = sca.MSAsearch(headers_full, sequences_full, seq_pdb)
                 options.i_ref = i_ref
                 print_("reference sequence index is: {:d}".format(i_ref))
                 print_(headers_full[i_ref])
                 print_(sequences_full[i_ref])
             except:
                 sys.exit("Error!!  Can't find reference sequence...")
         sequences, ats = sca.makeATS(sequences_full, ats_pdb, seq_pdb, i_ref, options.truncate)
         dist_new = np.zeros((len(ats), len(ats)))
         for (j, pos1) in enumerate(ats):
             for (k, pos2) in enumerate(ats):
                 if k != j:
                     if (pos1 == '-') or (pos2 == '-'):
                         dist_new[j, k] == 1000
                     else:
                         ix_j = ats_pdb.index(pos1)
                         ix_k = ats_pdb.index(pos2)
                         dist_new[j, k] = dist_pdb[ix_j, ix_k]
         dist_pdb = dist_new
     except:
         sys.exit("Error!!! Something wrong with PDBid or path...")
 elif options.refseq is not None:
     print_("Finding reference sequence using provided sequence file...")
Esempio n. 4
0
    print('Collecting residue data from PDB')
    pdb_residues = get_pdb_residues(options.pdbid, options.chainID, QUIET=True)

    print('Parsing sequence and pdb_ats')
    pdb_sequence, pdb_ats = get_pdb_sequence(pdb_residues)

    print('Calculating distances')
    pdb_distances = get_pdb_distances(pdb_residues)

    print("Finding reference sequence using Bio.pairwise2.align.globalxx")
    reference_index = locate_reference(position_filtered_sequences,
                                       pdb_sequence)

    print("Index of reference sequence: {}".format(reference_index))
    sequences, ats = sca.makeATS(position_filtered_sequences, pdb_ats,
                                 pdb_sequence, reference_index,
                                 options.truncate)

    # filtering sequences and positions, calculations of effective number of seqs
    print(
        "Conducting sequence and position filtering: alignment size is {} seqs, {} pos"
        .format(len(sequences), len(sequences[0])))
    print(f'ATS size: {len(ats)}')
    print(
        f'Dim Distance Matrix: {len(pdb_distances)} x {len(pdb_distances[0])}')

    alg0, seqw0, seqkeep = sca.filterSeq(sequences,
                                         reference_index,
                                         max_fracgaps=PARAMETERS[1],
                                         min_seqid=PARAMETERS[2],
                                         max_seqid=PARAMETERS[3])