コード例 #1
0
ファイル: rna_loci_assoc.py プロジェクト: sergpolly/FluUtils
###                                                  ###
########################################################
# ['bound', 'dgnp', 'long_loops', 'loops_all', 'paired', 'structured', 'threshold_freq']
# # parameter to control RNA-NP non-specific attraction ... 
# dg_NP = float(sys.argv[1])
# threshold_freq = 1.0
enrichment_dat = pd.DataFrame({})
#############
dg_list = [-0.18,-0.2,-0.25,-0.3]
dg_list_num = len(dg_list)
###################
for dg_NP in dg_list:
    print "NP melting energy:",dg_NP
    ##################################
    # setup flu genome here ...
    h1n1 = flu.influenza(genome_fname,orf_fname)
    # calculate its RNA structure ...
    h1n1.calculate_rna_lMFE_optimized(dg_NP)
    ##################################
    for loci_group_name in loci_group_names:
        # print name ...
        print "loci of interest: ", loci_group_name
        loci = loci_groups.get_group(loci_group_name)
        loci_seg = loci[['seg','pos']].groupby('seg')
        #
        enrichment_tmp = get_loci_structure_pattern(h1n1, loci_seg, "%.3f"%threshold_freq)
        #
        enrichment_dat.append(enrichment_tmp,ignore_index=True)
####################################################
#  DEALING WITH THE ENRICHMENT DATA ...
####################################################
コード例 #2
0
# Kyte-Doolittle hydrophobicity scale ...
from Bio.SeqUtils import ProtParamData
KD = ProtParamData.kd
KD['*'] = 25.0
###################################################
#
# # columns = ['name','seg','pos','description']
# seg_num = 8
# name = 'test1'
out_fname = 'test_loci.txt'
#
########################################################
# reference loading ...
ref_fname = "./pH1N1_coding_dat/pH1N1.fa"
orf_fname = "./pH1N1_coding_dat/pH1N1_noPB1F.orf"
ph1n1 = flu.influenza(ref_fname, orf_fname)
#########################################################

# typical fname for segment alignemnt ...
aligned_seg_fname = lambda number: "seg%d.afa"%number

# load coding part alignments for all of the segments ...
segment_aln = {}
for seg_idx in range(1,ph1n1.segnum+1):
    fname = aligned_seg_fname(seg_idx)
    segment_aln['seg%d'%seg_idx] = AlignIO.read(fname,'fasta')
    # brief alignemnt info
    aln_size = len(segment_aln['seg%d'%seg_idx])
    aln_length = segment_aln['seg%d'%seg_idx].get_alignment_length()
    # print
    # print "segment %d: %d sequences of length %d are aligned (length%%3=%d)"%(seg_idx, aln_size, aln_length, aln_length%3)