コード例 #1
0
def run_locarnap(seqsin, numkept, cpus=1, foldless=False):
    """Runs locarna-p on a set of sequences in MinimalFastaParser format
    [(header, seq), (header, seq)] and retgurns alignemtn and structure"""
    seqs, headers = remove_duplicates(seqsin)
    # blank headers to save memory
    headers = 0
    # make sure group has enough sequences before continuing
    if len(seqs) < numkept and not foldless:
        return "", ""
    # headers come out in format Header_# so split to get # and sort by abundance
    seqs.sort(reverse=True, key=lambda count: int(count[0].split("_")[1]))
    # cut to numkept most abundant sequences
    if len(seqs) > numkept:
        seqs = seqs[:numkept]
    return create_locarnap_alignment(seqs, RNA, struct=True, params={"--cpus": cpus})
コード例 #2
0
def run_locarnap(seqsin, numkept, cpus=1,foldless=False):
    '''Runs locarna-p on a set of sequences in MinimalFastaParser format
    [(header, seq), (header, seq)] and returns alignment and structure'''
    #make sure group has enough sequences before continuing
    if len(seqsin) < numkept and not foldless:
        return "", ""
    
    if len(seqsin) == 1:
        #raise ValueError("Need at least two sequences for locarna-p")
        return LoadSeqs(data=seqsin, moltype=RNA),  get_secondary_structure(seqsin[0][1])[1]
    #headers come out in format Header_# so split to get # and sort by abundance
    seqsin.sort(reverse=True, key=lambda count: int(count[0].split('_')[1]))
    #cut to numkept most abundant sequences
    if len(seqsin) > numkept:
        seqsin = seqsin[:numkept]
    
    aln, struct = create_locarnap_alignment(seqsin, RNA, struct=True, params={'--cpus': cpus})
    struct = struct.replace('-', ".")
    return aln, struct
コード例 #3
0
 otu = currotu[0]
 print "==" + otu + "=="
 print "Reading in 30 most abundant sequences"
 # assuming that the fasta has more than 30 sequences in it. Safe assumption
 # if this is a significant cluster
 seqs = [(header, seq) for header, seq in MinimalFastaParser(open(currotu[1], "rU"))]
 seqs, headers = remove_duplicates(seqs)
 # blank headers to save memory
 headers = 0
 # headers come out in format Header_# so split to get # and sort by abundance
 seqs.sort(reverse=True, key=lambda count: int(count[0].split("_")[1]))
 # cut to 30 most abundant sequences
 seqs = seqs[:30]
 print "Running locarna-p on sequences"
 args = {"--cpus": "24"}
 aln, struct = create_locarnap_alignment(seqs, RNA, struct=True, params=args)
 # create output folder for OTU
 otufolder = "/Users/Ely/Desktop/Ely_selection/R7/lead_clusters/"
 if not exists(otufolder):
     mkdir(otufolder)
 otufolder += otu
 if not exists(otufolder):
     mkdir(otufolder)
 # print out alignment and structure in fasta and stockholm formats
 alnout = open(otufolder + "/locarnap-aln.fasta", "w")
 alnout.write(aln.toFasta() + "\n>SS_struct\n" + struct + "\n")
 alnout.close()
 alnout = open(otufolder + "/locarnap-aln.sto", "w")
 struct_dict = {"SS_cons": struct}
 alnout.write(stockholm_from_alignment(aln, GC_annotation=struct_dict))
 alnout.close()