Beispiel #1
0
def main():
    print('Reading fasta file...')
    sequences = parse_fasta('COMP.txt')

    print('Computing pairwise edit distances...', end='', flush=True)
    D = {(a, b): -align_score(sequences[a], sequences[b])
         for a in sequences for b in sequences}
    print()

    print('Performing center star alignment...', end='', flush=True)
    center = findCenterSeq(sequences)
    profile = centerStar_align(center, sequences)
def main():
    print('Reading fasta file...')
    sequences = parse_fasta('COMP.txt')
    
    print('Performing center star alignment...', end='', flush=True)
    center = findCenterSeq(sequences)
    alignment = centerStar_align(center, sequences)

    with open('center_start_alignment.txt', 'w') as f:
        f.write('\n'.join(['{}: {}'.format(key, value)
                           for key, value in alignment.items()]))

    for name in alignment:
        print(alignment[name])
Beispiel #3
0
def load_aa_positions(fasta, aa):
    '''
    For every protein sequence in `fasta` builds an index of aminoacids specified in `aa`
    Returns long DataFrame
    '''

    def build_aa_df(name, seq, aa):
        res = []
        for i,c in enumerate(seq):
            if c in aa:
                res.append({'prot': name, 'aa': c, 'pos': i+1})
        return pd.DataFrame.from_records(res)
    
    aa_map = []
    for name,seq in parse.parse_fasta(fasta):
        aa_map.append(build_aa_df(name, seq, aa))
    return pd.concat(aa_map)
Beispiel #4
0
from parse import parse_fastq
from teitlib import *
from time import sleep
from time import time
import os
import re

from fnmatch import fnmatch

start_time = time()
"""
seqs1 = parse_fastq('SRR11397721_1.fastq',438663)
seqs2 = parse_fastq('SRR11397721_2.fastq',438663)
seqs = seqs1 + seqs2
"""
name, ref = parse_fasta("reference.fasta")
#aname, test0 = parse_fasta("aligned_reference.fasta")
aname,test0 = parse_fasta('align_seqs_1og2.fasta')
#aname, test0 = parse_fasta("align_seqs1.fasta")
with open('nomatches_seqs1og2.txt') as f:
    seqs = f.read().splitlines()

pname,morning = parse_fasta('morning.fasta')
p = "ATTCTGCTGTCAAATTACAGAATAATGAGCTTAG"
"""
matching = [i for i in seqs if ref.find(i) > -1]
c = 0
for i in range(len(matching)):
    if matching[i].find(p) >= 0:
        c+=1
print("c:",c)                        
Beispiel #5
0
import os
import re

from fnmatch import fnmatch

start_time = time()
"""
seqs1 = parse_fastq('SRR11397721_1.fastq',438663)
seqs2 = parse_fastq('SRR11397721_2.fastq',438663)
seqs = seqs1 + seqs2
"""
with open('nomatches_seqs1og2.txt') as f:
    seqs = f.read().splitlines()

#aname,test0 = parse_fasta('align_seqs_1og2.fasta')
name, ref = parse_fasta("reference.fasta")
aname, test0 = parse_fasta("s100.fasta")


# aligns list of sequences to a reference
def align(Text, Pattern):
    s = "x" * len(Text)
    s = list(s)
    for i in range(len(Pattern)):
        index = Text.find(Pattern[i])
        if index >= 0:
            s[index:index + len(Pattern[i])] = list(Pattern[i])
    return string(s)


# returns all missing indices
Beispiel #6
0
parser = argparse.ArgumentParser(description='''
    pScatter
    Interaction visualizer for pLink XLMS data, by Justin Jee (with design by Katelyn McGary Shipper)

    '''
    )

parser.add_argument('fasta_file', type=str, help='fasta file with protein sequences')
parser.add_argument('xwalk_file', type=str, help='XWalk output file')
parser.add_argument('-a', '--aminoacid', default='K', help='cross-linkable aminoacids. Defaults to Lysine (K).')
args = parser.parse_args()

fd = {}
ld = {}
with open(args.fasta_file, 'r') as fi:
    for name,seq in parse.parse_fasta(fi):
        fd[name] = [m.start() + 1 for m in re.finditer(args.aminoacid, seq)]
        ld[name] = len(seq)

#xwalk
# Line
# 1 uvrdnap_ecmodel.pdb LYS-486-A-CB    LYS-496-B-CB    652 10.7    11.5    -   -   -

with open(args.xwalk_file, 'r') as fi:

    xd = {}

    for line in fi:
        temparray = line.split()
        temp1 = temparray[2].split('-')
        temp2 = temparray[3].split('-')
from parse import parse_fasta
from parse import parse_fastq
from teitlib import *
from time import sleep
from time import time
import re

start_time = time()
seqs1 = parse_fastq('SRR11397721_1.fastq', 438663)
seqs2 = parse_fastq('SRR11397721_2.fastq', 438663)
seqs = seqs1 + seqs2
# delete duplicates
seqs = list(dict.fromkeys(seqs))

name, ref = parse_fasta("fasta/reference.fasta")

# aligns list of sequences to a reference


# returns all missing indices
def get_missing_indices(aligned_reference):
    missing_indices = []
    for i in range(len(aligned_reference)):
        curr = aligned_reference[i]
        if curr == 'x':
            missing_indices.append(i)
    return missing_indices


# returns start and end of missing indices
def get_missing_indices_brief(aligned_reference):
Beispiel #8
0
from parse import parse_fasta
from parse import parse_fastq

from teitlib import *

from time import time

start = time()
refname,ref = parse_fasta('reference.fasta')
aliname,ali = parse_fasta('align_seqs_1og2.fasta')
#aliname,ali = parse_fasta('align_seqs1.fasta')
#aliname,ali = parse_fasta('align12_reverse.fasta')
#aliname,ali = parse_fasta('aligned12_wduplicates.fasta')
seqs = parse_fastq('SRR11397721_1.fastq',438663)
   
with open('nomatches_seqs1og2.txt') as f:
    nomatch = f.read().splitlines()
   
matches = [i for i in seqs if ref.find(i) > -1]
printList(matches)
   
mi = [i for i in range(len(ali)) if ali[i] == 'x'] 


def get_start_and_end_ofxinterval(li):
    ret = []
    ret.append(li[0])
    for i in range(len(li)-1):
        if li[i] != li[i+1]-1:
            ret.append(li[i])
            ret.append(li[i+1])