Ejemplo n.º 1
0
def main():
    usage = """
    python interior_loop_angles.py pdb_file

    Iterate over the interior loop angles and calculate how much of a kink
    they introduce between the two adjacent stems.
    """
    num_args= 0
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    cg = ftmc.from_pdb(op.expanduser(args[0]))
    for iloop in cg.iloop_iterator():
        conn = cg.connections(iloop)
        angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0], cg.coords[conn[1]][1] - cg.coords[conn[1]][0])

        fud.pv('iloop, angle')
Ejemplo n.º 2
0
def calculate_variation(angle_stats, loop_size):
    '''
    Calculate how much the statistics for a particular vary
    based on its dimensions. I.e., small bulges should vary
    very little whereas larger loops should vary a lot.

    An exact method for calculating how much a loop can vary
    is difficult to derive, but this method will use the
    simplest available, the volume of the n-dimensional
    enclosure defined by the minimum and the maximum coordinates.
    
    :param stats: forgi.threedee.model.stats.AngleStats
    :param dims: The dimensions of the loop (i.e. (1,3))
    :return: The volume of the accessible area.
    '''
    # ang_type indicates whether it's an iloop forward/backward 
    # or a multiloop forward/backward
    ang_types = [1,2,3,4]

    for ang_type in ang_types:
        ang_dims = tuple(list(loop_size) + [ang_type])
        if ang_dims in angle_stats:

            fud.pv('ang_dims')
            fud.pv('len(angle_stats[ang_dims])')
Ejemplo n.º 3
0
def calculate_variation(angle_stats, loop_size):
    '''
    Calculate how much the statistics for a particular vary
    based on its dimensions. I.e., small bulges should vary
    very little whereas larger loops should vary a lot.

    An exact method for calculating how much a loop can vary
    is difficult to derive, but this method will use the
    simplest available, the volume of the n-dimensional
    enclosure defined by the minimum and the maximum coordinates.

    :param stats: forgi.threedee.model.stats.AngleStats
    :param dims: The dimensions of the loop (i.e. (1,3))
    :return: The volume of the accessible area.
    '''
    # ang_type indicates whether it's an iloop forward/backward
    # or a multiloop forward/backward
    ang_types = [1, 2, 3, 4]

    for ang_type in ang_types:
        ang_dims = tuple(list(loop_size) + [ang_type])
        if ang_dims in angle_stats:

            fud.pv('ang_dims')
            fud.pv('len(angle_stats[ang_dims])')
Ejemplo n.º 4
0
def main():
    usage = """
    python interior_loop_angles.py pdb_file

    Iterate over the interior loop angles and calculate how much of a kink
    they introduce between the two adjacent stems.
    """
    num_args = 0
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    cg = ftmc.from_pdb(op.expanduser(args[0]))
    for iloop in cg.iloop_iterator():
        conn = cg.connections(iloop)
        angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0],
                               cg.coords[conn[1]][1] - cg.coords[conn[1]][0])

        fud.pv('iloop, angle')
Ejemplo n.º 5
0
    def test_from_pdb(self):
        with open('test/data/1MZP.pdb', 'r') as f:
            text = f.read()

            res = forna.pdb_to_json(text, '2ZM5')
            s = json.dumps(res)
            fud.pv('s')

        '''
Ejemplo n.º 6
0
def main(args):
    #Setup that does not use the random number generator.
    randstate=random.getstate()#Just for verification purposes
    sm, original_sm, ofilename, energy, energies_to_track = setup_deterministic(args)
    assert randstate==random.getstate()#Just for verification purposes
    fud.pv("energies_to_track")
    #Eval-energy mode
    if args.eval_energy:
        sm.bg.add_all_virtual_residues()
        fud.pv('energy.eval_energy(sm, verbose=True, background=False)')
        if sm.constraint_energy:
            fud.pv('sm.constraint_energy.eval_energy(sm, verbose=True, background=False)')
        if sm.junction_constraint_energy:
            fud.pv('sm.junction_constraint_energy.eval_energy(sm, verbose=True, background=False)')
        for track_energy in energies_to_track:
            fud.pv('track_energy.eval_energy(sm, verbose=True, background=False)')
        sys.exit(0) 
  
    #Set-up the random Number generator.
    #Until here, no call to random should be made.
    if args.seed:
        seed_num=args.seed
    else:
        seed_num = random.randint(0,4294967295) #sys.maxint) #4294967295 is maximal value for numpy
    random.seed(seed_num)
    np.random.seed(seed_num)
    #Main function, dependent on random.seed        
    with open_for_out(ofilename) as out_file:
        if isinstance(energy, fbe.CombinedEnergy):
            energies_to_track+=energy.uncalibrated_energies
        elif isinstance(energy, fbe.CoarseGrainEnergy):
            energies_to_track+=[energy]
        stat=setup_stat(out_file, sm, args, energies_to_track, original_sm)
        try:
            print ("# Random Seed: {}".format(seed_num), file=out_file)
            print ("# Command: `{}`".format(" ".join(sys.argv)), file=out_file)
            for e in energy.iterate_energies():
                if isinstance(e, fbe.FPPEnergy):
                    print("# Used FPP energy with options: --scale {} --ref-img {} "
                          "--fpp-landmarks {}".format(e.scale, e.ref_image, 
                                                      ":".join(",".join(map(str,x)) for x in e.landmarks)),
                          file=out_file)
            if args.exhaustive:
                sampler = fbs.ExhaustiveExplorer(sm, energy, stat, args.exhaustive, args.start_from_scratch)
            elif args.new_ml:
                sampler = fbs.ImprovedMultiloopMCMC(sm, energy, stat, 
                                          start_from_scratch=args.start_from_scratch,
                                          dump_measures=args.dump_energies)
            else:
                sampler = fbs.MCMCSampler(sm, energy, stat, 
                                          start_from_scratch=args.start_from_scratch,
                                          dump_measures=args.dump_energies)
            for i in range(args.iterations):
                sampler.step()
        finally: #Clean-up 
            print("INFO: Random seed was {}".format(seed_num), file=sys.stderr)
Ejemplo n.º 7
0
def json_to_json(rna_json_str):
    '''
    Convert an RNA json string to fasta file, then to a bulge_graph
    and then back to a json.

    The purpose is to maintain the integrity of the molecule and to
    maintain the positions of all the hidden nodes after modification.
    '''
    with open('test.out', 'w') as f:
        f.write(rna_json_str)

    (all_fastas, all_xs, all_ys, all_uids,
     different_tree_links) = json_to_fasta(rna_json_str)
    big_json = {'nodes': [], 'links': []}

    coords_to_index = dict()
    for fasta_text, xs, ys, uids in zip(all_fastas, all_xs, all_ys, all_uids):
        bg = fgb.BulgeGraph()
        bg.from_fasta(fasta_text)
        new_json = bg_to_json(bg, xs=xs, ys=ys, uids=uids)

        for l in new_json['links']:
            # the indices of the new nodes will be offset, so the links
            # have to have their node pointers adjusted as well
            l['source'] += len(big_json['nodes'])
            l['target'] += len(big_json['nodes'])
            big_json['links'] += [l]

        # Create a mapping between the coordinates of a node and its index
        # in the node list. To be used when creating links between different
        # molecules, which are stored according to the coordinates of the nodes
        # being linked
        for i, n in enumerate(new_json['nodes']):
            if n['node_type'] == 'nucleotide':
                coords_to_index[(n['x'], n['y'])] = i + len(big_json['nodes'])

        big_json['nodes'] += new_json['nodes']

    # add the links that are between different molecules
    for dtl in different_tree_links:
        fud.pv('dtl')
        n1 = coords_to_index[(dtl[0])]
        n2 = coords_to_index[(dtl[1])]

        fud.pv('n1,n2')
        big_json['links'] += [{
            'source': n1,
            'target': n2,
            'link_type': 'basepair',
            'value': 1
        }]

    #fud.pv('big_json["nodes"]')

    return big_json
Ejemplo n.º 8
0
    def test_angle_stat_difference(self):
        as1 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=1.57, v1=0)
        as2 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=1.57, v1=0)

        self.assertTrue(np.allclose([as1.diff(as2)],[0]))
        as2 = ftms.AngleStat(u=0, v=0., r1=1, u1=1.57, v1=0)

        self.assertTrue(np.allclose([as1.diff(as2)],[math.sqrt(2)],0.01))

        as2 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=0, v1=0)
        fud.pv('as1.diff(as2)')
Ejemplo n.º 9
0
    def test_angle_stat_difference(self):
        as1 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=1.57, v1=0)
        as2 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=1.57, v1=0)

        self.assertTrue(np.allclose([as1.diff(as2)], [0]))
        as2 = ftms.AngleStat(u=0, v=0., r1=1, u1=1.57, v1=0)

        self.assertTrue(np.allclose([as1.diff(as2)], [math.sqrt(2)], 0.01))

        as2 = ftms.AngleStat(u=1.57, v=0., r1=1, u1=0, v1=0)
        fud.pv('as1.diff(as2)')
Ejemplo n.º 10
0
    def test_sample_stats(self):
        fa_text = """>1
        AGAGGUUCUAGCUACACCCUCUAUAAAAAACUAAGG
        (((((............)))))..............
        """

        cg = ftmc.CoarseGrainRNA()
        cg.from_fasta(fa_text)

        conf_stats = ftms.get_conformation_stats()
        stats = conf_stats.sample_stats(cg, 't1')

        fud.pv('cg.to_cg_string()')
        fud.pv('stats')
Ejemplo n.º 11
0
    def test_sample_stats(self):
        fa_text = """>1
        AGAGGUUCUAGCUACACCCUCUAUAAAAAACUAAGG
        (((((............)))))..............
        """
        
        cg = ftmc.CoarseGrainRNA()
        cg.from_fasta(fa_text)

        conf_stats = ftms.get_conformation_stats()
        stats = conf_stats.sample_stats(cg, 't1')

        fud.pv('cg.to_cg_string()')
        fud.pv('stats')
Ejemplo n.º 12
0
def main():
    usage = './get_stem_fragments.py [temp.comp]'
    parser = OptionParser()

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')
    parser.add_option('-o', '--output-dir', dest='output_dir', default=fbc.Configuration.stem_fragment_dir, 
                      help='The directory in which to output all of the fragments', type='str')

    (options, args) = parser.parse_args()

    if not os.path.exists(options.output_dir):
        os.makedirs(options.output_dir)

    if len(args) == 1:
        bg = ttmc.CoarseGrainRNA(args[0])

        for st in bg.elements():
            filename = '%s_%s.pdb' % (bg.name, "_".join(map(str, bg.defines[st])))
            out_file = os.path.join(options.output_dir, filename)
            s = PDBParser().get_structure('t', os.path.join(fbc.Configuration.data_base_dir, "%s/temp.pdb" % (bg.name)))
            output_stem_fragment(bg.defines[st], s, out_file)
        sys.exit(0)

    #stats = [cbs.get_angle_stats(), cbs.get_loop_stats()]
    #stem_stats = cbs.get_stem_stats()

    structures = dict()
    prev_pdb_name = ''

    for l in it.chain(cbs.get_angle_stats().values(), cbs.get_stem_stats().values(),
                       cbs.get_loop_stats().values()):
        for ss in l:
            filename = '%s_%s.pdb' % (ss.pdb_name, "_".join(map(str, ss.define)))
            out_file = os.path.join(options.output_dir, filename)

            if ss.pdb_name != prev_pdb_name:
                cud.pv('ss.define, fbc.Configuration.data_base_dir, ss.pdb_name')
                s = PDBParser().get_structure('t', os.path.join(fbc.Configuration.data_base_dir, "%s/temp.pdb" % (ss.pdb_name)))
                prev_pdb_name = ss.pdb_name

            print out_file, ss.define
            output_stem_fragment(ss.define, s, out_file)
Ejemplo n.º 13
0
def main():
    usage = """
    python interior_loop_angles.py pdb_file

    Iterate over the interior loop angles and calculate how much of a kink
    they introduce between the two adjacent stems.
    """
    num_args = 0
    parser = OptionParser(usage=usage)

    parser.add_option("-o",
                      "--output",
                      action="store",
                      help="Store data in csv with this filename")
    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    data = list()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    cg, = ftmc.CoarseGrainRNA.from_pdb(op.expanduser(args[0]))
    for iloop in cg.iloop_iterator():
        conn = cg.connections(iloop)
        angle = ftuv.vec_angle(cg.coords[conn[0]][1] - cg.coords[conn[0]][0],
                               cg.coords[conn[1]][1] - cg.coords[conn[1]][0])
        data.append([iloop, angle])

        fud.pv('iloop, angle')

    if options.output and len(data):
        with open(options.output, 'w') as FILE:
            writer = csv.writer(FILE, delimiter="\t", lineterminator="\n")
            writer.writerow(["iloop", "angle"])
            for row in data:
                writer.writerow(row)
def main():
    usage = """
    ./helix_orienation_divergences.py

    Analyze how much the helix-helix orientations diverge between two data sets.
    """
    num_args=0
    parser = OptionParser()

    parser.add_option('-r', '--resolution', dest='resolution', default=10, help="The resolution of the resulting plot", type='int')
    parser.add_option('-a', '--angle', dest='angle', default=0, help="The angle of the camera", type='float')
    parser.add_option('-f', '--fig-name', dest='fig_name', default='', help="The name of the file to save the figure to. If it is not specified, the figure will not be saved", type='str')
    parser.add_option('-i', '--interior_loops', dest='interior_loops', default=False, help='Cluster only the interior loops', action='store_true')
    parser.add_option('-m', '--multi_loops', dest='multi_loops', default=False, help='Cluster only the interior loops', action='store_true')

    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    column_names = ['type', 'pdb', 's1', 's2', 'u', 'v', 't', 'r', 'u1', 'v1', 'atype', 'something1', 'something2', 'sth3', 'sth4']


    real_stats = ftms.ConformationStats('fess/stats/real.stats').angle_stats
    sampled_stats = ftms.ConformationStats('fess/stats/temp.stats').angle_stats

    # count how many statistics we have for each statistic type
    stat_counts = c.defaultdict(int)
    for sc in real_stats.keys():
        stat_counts[sc] += len(real_stats[sc])

    histograms = dict()
    for b in stat_counts.keys():
        if b[2] != 2.:
            # only look at type 2 angles
            continue

        if options.interior_loops:
            if b[0] == 1000 or b[1] == 1000:
                continue
        if options.multi_loops:
            if b[0] != 1000 and b[1] != 1000:
                continue

        (selected_sizes, count) = get_nearest_dimension_sizes(b, stat_counts, 1)

        if count < 3:
            continue

        fud.pv('b, selected_sizes')

        combined_real = []

        # get the statistics that correspond to the selected sampled sizes
        for ss in selected_sizes:
            #ss_r = get_certain_angle_stats(real_stats, ss)
            ss_r = real_stats[ss]

            combined_real += list(ss_r[['u','v']].as_matrix())

        num_points = len(combined_real)
        combined_real = np.array(combined_real)
        #histograms[b] = (np.histogram2d(combined_real[:,0], combined_real[:,1], range=[[0, m.pi], [-m.pi, m.pi]])[0] + 0.5) / float(num_points)
        histograms[b] = combined_real

    dists = []
    named_dists = dict()
    pp_dists = dict()
    for k1, k2 in it.combinations(histograms.keys(), 2):
        per_point_distances = []
        for p1 in histograms[k1]:
            point_distances = []
            for p2 in histograms[k2]:
                point_distances += [ftuv.magnitude(p1 - p2)]
            per_point_distances += [min(point_distances)]

        for p2 in histograms[k2]:
            point_distances = []
            for p1 in histograms[k1]:
                point_distances += [ftuv.magnitude(p1-p2)]
            per_point_distances += [min(point_distances)]

        dists += [max(per_point_distances)]
        named_dists[(k1,k2)] = max(per_point_distances)
        pp_dists[(k1,k2)] = per_point_distances

        '''
        kl = histograms[k1] * (histograms[k1] / histograms[k2])
        kl = sum(map(sum, kl))
        dists += [kl]
        '''

    fud.pv('dists')
    Z = sch.complete(dists)
    fud.pv('Z')
    sch.dendrogram(Z, labels = histograms.keys(), leaf_rotation=90)
    plt.subplots_adjust(bottom=0.25)
    
    plt.show()

    k1 = (6,7,2)
    k2 = (5,6,2)

    rs = get_certain_angle_stats(real_stats, k1)
    ss = get_certain_angle_stats(real_stats, k2)

    fud.pv('named_dists[(k1,k2)]')
    fud.pv('pp_dists[(k1,k2)]')

    real_us = rs[['u', 'v']].as_matrix()
    sampled_us = ss[['u','v']].as_matrix()

    U_r = real_us[:,0]
    V_r = real_us[:,1]

    U_s = sampled_us[:,0]
    V_s = sampled_us[:,1]

    total_r = len(U_r)
    total_s = len(U_s)

    hr = np.histogram2d(U_r, V_r)
    hs = np.histogram2d(U_s, V_s)

    pseudo_r = (hr[0] + 1) / total_r
    pseudo_s = (hs[0] + 1) / total_r
    kl = pseudo_r * (pseudo_r / pseudo_s)
    fud.pv('kl')
    fud.pv('sum(map(sum, kl))')

    X_r = np.sin(U_r) * np.cos(V_r)
    Y_r = np.sin(U_r) * np.sin(V_r)
    Z_r = np.cos(U_r)

    r = 1.
    X_s = r * np.sin(U_s) * np.cos(V_s)
    Y_s = r * np.sin(U_s) * np.sin(V_s)
    Z_s = r * np.cos(U_s)

    fud.pv('real_us')

    real_us_orig = np.copy(real_us)
    sampled_us_orig = np.copy(sampled_us)

    print len(real_us), len(sampled_us)

    fig = plt.figure(figsize=(10,10))
    ax = Axes3D(fig)

    a = Arrow3D([-1.3,1.3],[0,0],[0,0], mutation_scale=20, lw=5, arrowstyle="-|>", color="g")
    ax.add_artist(a)

    ax.plot(X_r, Y_r, Z_r, 'bo', alpha=0.3)
    ax.plot(X_s, Y_s, Z_s, 'ro', alpha=0.3)

    u, v = np.mgrid[0:2*np.pi:20j, 0:np.pi:10j]
    x=np.cos(u)*np.sin(v)
    y=np.sin(u)*np.sin(v)
    z=np.cos(v)
    ax.plot_wireframe(x, y, z, color="y")

    #surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
    #       linewidth=0, antialiased=False)

    ax._axis3don=False
    ax.set_zlim3d(-1, 1)
    ax.w_zaxis.set_major_locator(LinearLocator(6))
    ax.view_init(0, options.angle)

    '''
    plt.subplots_adjust(left=0.4, right=0.9, top=0.9, bottom=0.1)

    for i in xrange(0, 360, 40):
        savefig("fig%d.png", (i))
    '''

    '''
    sm = cm.ScalarMappable(cmap=cm.jet)
    sm.set_array(W)
    fig.colorbar(sm)
    '''

    if options.fig_name != "":
        plt.savefig(options.fig_name, bbox_inches='tight')
    else:
        plt.show()
Ejemplo n.º 15
0
def main():
    usage = """
    ./visualize_cg.py cg_file

    Display the coarse-grain representation of a structure in pymol.
    """
    num_args = 1
    parser = OptionParser(usage=usage)

    # parser.add_option('-u', '--useless', dest='uselesss',
    # default=False, action='store_true', help='Another useless option')
    parser.add_option('-g',
                      '--highlight',
                      dest='highlight',
                      default=None,
                      help="Highlight some elements",
                      type='str')
    parser.add_option('-o',
                      '--output',
                      dest='output',
                      default=None,
                      help="Create a picture of the scene and exit",
                      type='str')
    parser.add_option('-r',
                      '--longrange',
                      dest='longrange',
                      default=False,
                      action='store_true',
                      help="Display long-range interactions")
    parser.add_option('-l',
                      '--loops',
                      dest='loops',
                      default=True,
                      action='store_false',
                      help="Don't display the coarse-grain hairpin loops")
    parser.add_option('-c',
                      '--cones',
                      dest='cones',
                      default=False,
                      action='store_true',
                      help="Display cones that portrude from the stems")
    parser.add_option('-x',
                      '--text',
                      dest='text',
                      default=False,
                      action='store_true',
                      help="Add labels to the figure.")
    parser.add_option('-a',
                      '--align',
                      dest='align',
                      default=False,
                      action='store_true',
                      help='Align all of the structures with the first')
    parser.add_option(
        '-e',
        '--encompassing-stems',
        dest='encompassing_stems',
        default=False,
        action='store_true',
        help='Show the big stems that encompass the colinear ones.')
    parser.add_option('-v',
                      '--virtual-atoms',
                      dest='virtual_atoms',
                      default=False,
                      action='store_true',
                      help='Display the virtual atoms')
    parser.add_option('-d',
                      '--distance',
                      dest='distance',
                      default=None,
                      help="Draw the lines between specified virtual residues")
    parser.add_option('-b',
                      '--basis',
                      dest='basis',
                      default=False,
                      action='store_true',
                      help='Display the coordinate basis of each element')
    parser.add_option('',
                      '--batch',
                      dest='batch',
                      default=False,
                      action='store_true',
                      help='Start pymol in batch mode')
    parser.add_option(
        '',
        '--sidechain-atoms',
        dest='sidechain_atoms',
        default=False,
        action='store_true',
        help=
        'Include the sidechain atoms. Automatically enables --virtual-atoms')
    parser.add_option(
        '',
        '--rainbow',
        dest='rainbow',
        default=False,
        action='store_true',
        help=
        'Color each of the nucleotide positions (i.e. average atoms) according to the colors of \
                      the rainbow and their position')
    parser.add_option('',
                      '--only-elements',
                      dest='only_elements',
                      default=None,
                      help='Display only these elements '
                      'element names should be '
                      'separated by commas')
    parser.add_option('',
                      '--color-gradual',
                      dest='color_gradual',
                      default=None,
                      help='Color the specified elements'
                      'gradually from one to the other, example (i1,i4,m1)',
                      type='str')

    (options, args) = parser.parse_args()

    print "hi"
    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)
    print "hi1"

    pp = cvp.PymolPrinter()
    pp.add_loops = options.loops
    pp.draw_cones = options.cones
    # sys.exit(1)
    pp.add_longrange = options.longrange
    pp.print_text = options.text
    pp.encompassing_stems = options.encompassing_stems
    pp.virtual_atoms = options.virtual_atoms
    pp.sidechain_atoms = options.sidechain_atoms
    pp.basis = options.basis
    pp.rainbow = options.rainbow

    if options.only_elements is not None:
        pp.only_elements = options.only_elements.split(',')

    cgs = []
    for a in args:
        cgs += [cmg.CoarseGrainRNA(a)]

    if options.align:
        align_cgs(cgs)

    if options.color_gradual is not None:
        pp.element_specific_colors = dict()
        import matplotlib.pyplot as plt
        cmap = plt.get_cmap('coolwarm')

        for d in cgs[0].defines:
            pp.element_specific_colors[d] = 'black'

        to_color_nodes = options.color_gradual.split(',')
        for i, node in enumerate(to_color_nodes):
            print node, cmap(i / float(len(to_color_nodes)))
            pp.element_specific_colors[node] = cmap(i /
                                                    float(len(to_color_nodes)))

    for i, cg in enumerate(cgs):
        if i > 0:
            pp.color_modifier = .3
            #pp.override_color = 'middle gray'

        pp.coordinates_to_pymol(cg)

    # highlight things in purple
    if options.highlight is not None:
        for s in options.highlight.split(','):
            fud.pv('s')
            pp.add_twists = False
            pp.add_stem_like(cg, s, color='purple', width=3.)

    # display the distances between nucleotides
    if options.distance is not None:
        virtual_atoms = ftug.virtual_atoms(cg, sidechain=False)

        for dist_pair in options.distance.split(':'):
            fud.pv('dist_pair')
            fr, to = dist_pair.split(',')

            fr = int(fr)
            to = int(to)

            pp.add_dashed(virtual_atoms[fr]["C1'"],
                          virtual_atoms[to]["C1'"],
                          width=1.2)

    with tf.NamedTemporaryFile() as f:
        with tf.NamedTemporaryFile(suffix='.pml') as f1:
            f.write(pp.pymol_string())
            f.flush()

            pymol_cmd = 'hide all\n'
            pymol_cmd += 'run %s\n' % (f.name)
            pymol_cmd += 'show cartoon, all\n'
            pymol_cmd += 'bg white\n'
            pymol_cmd += 'clip slab, 10000\n'
            pymol_cmd += 'orient\n'

            if options.output is not None:
                pymol_cmd += 'ray\n'
                pymol_cmd += 'png %s\n' % (options.output)
                pymol_cmd += 'quit\n'

            f1.write(pymol_cmd)
            f1.flush()

            print "f1.name:", f1.name

            if options.batch:
                p = sp.Popen(['pymol', '-cq', f1.name],
                             stdout=sp.PIPE,
                             stderr=sp.PIPE)
            else:
                p = sp.Popen(['pymol', f1.name],
                             stdout=sp.PIPE,
                             stderr=sp.PIPE)

            out, err = p.communicate()
            print >> sys.stderr, "err:", err
Ejemplo n.º 16
0
def predict(bg, energies_to_sample, options):
    fud.pv('energies_to_sample[0].energies')

    if options.cheating:
        sm = fbm.SpatialModel(bg)
        #energies_to_sample += [fbe.CombinedEnergy([], [fbe.CheatingEnergy(sm.bg)])]
        energies_to_sample = [fbe.CheatingEnergy(sm.bg)]

    if not os.path.exists(options.output_dir):
        os.makedirs(options.output_dir)
    if options.output_file == None or options.output_file == sys.stdout:
        options.output_file = sys.stdout
    else:
        options.output_file = open(options.output_file, 'w')

    cbc.Configuration.sampling_output_dir = op.join(options.output_dir, bg.name)

    if options.output_dir_suffix != None:
        cbc.Configuration.sampling_output_dir = op.join(cbc.Configuration.sampling_output_dir, options.output_dir_suffix)

    if not os.path.exists(cbc.Configuration.sampling_output_dir):
        os.makedirs(cbc.Configuration.sampling_output_dir)

    if options.fix_all_loops:
        options.fix_loop = ','.join([d for d in bg.defines if d[0] == 'i'])

    if options.jared_dir is not None:
        # run the jar3d_annotate script to get a list of potential statistics for each interior loop
        jared_script = op.join(options.jared_dir, 'scripts/annotate_structure.py')
        jared_data = op.join(options.jared_dir, 'JAR3D')

        filtered_stats_fn = op.join(cbc.Configuration.sampling_output_dir,
                                    'filtered.stats')

        cmd = ['python', jared_script, options.bg_filename, '-o', jared_data,
               '-m', '-e', '-d', jared_data]

        fud.pv('cmd')
        p = spr.Popen(cmd, stdout=spr.PIPE)
        out, err = p.communicate()

        with open(filtered_stats_fn, 'w') as filtered_out:
            filtered_out.write(out)

        filtered_stats = ftms.FilteredConformationStats(stats_file=options.stats_file,
                                                        filter_filename=filtered_stats_fn)
        ftms.set_conformation_stats(filtered_stats)
        print >>sys.stderr, "Using JAR3D filtered stats"
    elif options.filtered_stats_file is not None:
        filtered_stats = ftms.FilteredConformationStats(stats_file=options.stats_file,
                                                        filter_filename=options.filtered_stats_file)
        ftms.set_conformation_stats(filtered_stats)
    elif options.fix_loop is not None:
        filtered_stats = ftms.FilteredConformationStats(stats_file=options.stats_file)
        filtered_stats.filtered_stats = col.defaultdict(list)

        for element_to_fix in options.fix_loop.split(','):
            print >>sys.stderr, "fixing loop", element_to_fix
            if element_to_fix[0] != 'i' and element_to_fix[0] != 'm':
                print >>sys.stderr, "Cannot fix non-interior loop or multi-loop stats, yet!"
                sys.exit(1)
            as1, as2 = bg.get_bulge_angle_stats(element_to_fix)


            filtered_stats.filtered_stats[(element_to_fix, as1.ang_type)] += [as1]
            filtered_stats.filtered_stats[(element_to_fix, as2.ang_type)] += [as2]

        ftms.set_conformation_stats(filtered_stats)
        fud.pv('element_to_fix')

    elif options.stats_file is not None:
        cbc.Configuration.stats_file = options.stats_file
        print >>sys.stderr, "1"
        ftms.set_conformation_stats(ftms.ConformationStats(options.stats_file))

    sm = fbm.SpatialModel(bg)

    if options.log_to_file:
        options.output_file = open(op.join(cbc.Configuration.sampling_output_dir, 'log.txt'), 'w')

    if options.eval_energy:
        for s in sm.bg.stem_iterator():
            cgg.add_virtual_residues(sm.bg, s)

        for energy in energies_to_sample:
            fud.pv('energy.eval_energy(sm, verbose=True, background=False)')
        sys.exit(1)

    if options.plot:
        plotter = fbs.StatisticsPlotter()
    else:
        plotter = None

    colors = ['g','y','r']
    samplers = []

    # parse the distances that we want to keep track of 
    to_track_dists = []
    if options.dists is not None:
        for distance_pair in options.dists.split(':'):
            to_track_dists += [map(int, distance_pair.split(','))]

    # only samples from the first energy will be saved
    silent = False

    for color,energy in zip(colors, energies_to_sample):
        fud.pv('options.no_rmsd')
        stat = fbs.SamplingStatistics(sm, plotter, color, silent=silent, 
                                      output_file=options.output_file, 
                                      save_n_best = options.save_n_best, 
                                      dists = to_track_dists, 
                                      save_iterative_cg_measures=options.save_iterative_cg_measures, 
                                      no_rmsd = options.no_rmsd)
        stat.step_save = options.step_save

        fud.pv('options.mcmc_sampler')
        if options.mcmc_sampler:
            sm = fbm.SpatialModel(copy.deepcopy(bg))

            sm.constraint_energy = fbe.CombinedEnergy([])
            sm.junction_constraint_energy = fbe.CombinedEnergy([])

            if not (options.cheating or options.no_constraint):
                sm.constraint_energy = fbe.CombinedEnergy([fbe.CoarseStemClashEnergy(), fbe.StemVirtualResClashEnergy()])
                sm.junction_constraint_energy = fbe.RoughJunctionClosureEnergy()
            else:
                sm.constraint_energy = None
                sm.junction_constraint_energy = None

            #sm.constraint_energy = fbe.CombinedEnergy([fbe.RoughJunctionClosureEnergy()])
            #sm.constraint_energy = fbe.CombinedEnergy([fbe.StemVirtualResClashEnergy()])
            #sm.constraint_energy = fbe.CombinedEnergy([fbe.StemVirtualResClashEnergy(), fbe.RoughJunctionClosureEnergy()])
            if options.track_energies:
                energies_to_track = [fbe.RadiusOfGyrationEnergy()]

                fud.pv('len(list(bg.hloop_iterator()))')
                if len(list(bg.hloop_iterator())) > 1:
                    energies_to_track += [fbe.ShortestLoopDistanceEnergy()]
                    for h in bg.hloop_iterator():
                        energies_to_track += [fbe.ShortestLoopDistancePerLoop(h)]

                energies_to_track += [fbe.AMinorEnergy(loop_type='h')]
                #energies_to_track += [fbe.AMinorEnergy(loop_type='i')]
            else:
                energies_to_track = []

            fud.pv('energies_to_track')
            fud.pv('energy')
            sampler = fbs.MCMCSampler(sm, energy, stat, options.stats_type, options.no_rmsd, energies_to_track=energies_to_track)
            sampler.dump_measures = options.dump_energies
            samplers += [sampler]
        else:
            sm = fbm.SpatialModel(copy.deepcopy(bg))
            sm.constraint_energy = fbe.StemVirtualResClashEnergy()
            samplers += [fbs.GibbsBGSampler(sm, energy, stat)]
        silent = True

    fud.pv('samplers')
    for i in range(options.iterations):
        if options.single_sampler:
            samplers[0].step()
        else:
            for s in samplers:
                s.step()

    #stats.print_final_stats(energy_function)
    #stats.save_top()

    if plotter:
        plotter.finish()
        #plotter.plt.ioff()
        #plt.show()
        pass
Ejemplo n.º 17
0
def main():
    #seed(2)
    #seterr(all='ignore')
    #seterr(all='raise')
    parser = optparse.OptionParser()

    parser.add_option('', '--loop-energy', dest='loop_energy', default=False, action='store_true', help='Use the radius of gyration energy')
    parser.add_option('', '--dump-energies', dest='dump_energies', default=False, action='store_true', help='Dump the energies to file')
    parser.add_option('', '--track-energies', dest='track_energies', default=False, help='Track additional energy for diagnostics', action='store_true')
    parser.add_option('', '--energy-prefactor', dest='energy_prefactor', default=30, help='A multiplier for the energy', type='int')
    parser.add_option('-e', '--energy', dest='energy', default='energies/lrde.energy', help="The energy function to use when evaluating structures")
    parser.add_option('-i', '--iterations', dest='iterations', default=10000, help='Number of structures to generate', type='int')
    parser.add_option('-b', '--best_filename', dest='best_filename', default='best.coord', help="The filename to dump the best (least rmsd structure) into", type='str')
    parser.add_option('-p', '--plot', dest='plot', default=False, action='store_true', help="Plot the energies as they are encountered")
    parser.add_option('-d', '--distance', dest='distance_energy', default=False, action='store_true', help='Use the DistanceEnergy energy')
    parser.add_option('-c', '--clamp', dest='clamp', default=None, help='Clamp two elements together (i.e. add an energy with a target distance of 10 angstroms). The energy should be formatted as p1,p2:p3,p4:p5,p6 where p1 and p2 are clamped, p3 and p4 are clamped and p5 and p6 are clamped.', type='str')
    parser.add_option('-m', '--mcmc', dest='mcmc_sampler', default=True, action='store_true', help='Sample using the mcmc sampler.')
    parser.add_option('', '--rog', dest='radius_of_gyration', default=False, action='store_true', help='Use the radius of gyration energy')
    parser.add_option('', '--cylinder-rog', dest='cylinder_radius_of_gyration', default=False, action='store_true', help='Use the cylinder_intersection and radius of gyration energy')
    parser.add_option('', '--aminor-perloop-rog', dest='aminor_perloop_radius_of_gyration', default=False, action='store_true', help='Use the aminor and radius of gyration energies')
    parser.add_option('', '--specific-aminor', dest='specific_aminor', default=None, help='Use the specific aminor energy', type='str')
    parser.add_option('', '--aminor-perloop', dest='aminor_perloop', default=False, action='store_true', help='Use the aminor and radius of gyration energies')
    parser.add_option('', '--aminor-shortestloop', dest='aminor_shortestloop', default=False, action='store_true', help='Use the aminor and radius of gyration energies')
    parser.add_option('', '--aminor-rog', dest='aminor_radius_of_gyration', default=False, action='store_true', help='Use the aminor and radius of gyration energies')
    parser.add_option('', '--aminor', dest='aminor', default=False, action='store_true', help='Use the aminor and radius of gyration energies')
    parser.add_option('', '--cylinder-perloop-rog', dest='cylinder_perloop_radius_of_gyration', default=False, action='store_true', help='Use the radius of gyration energy')
    parser.add_option('', '--cylinder-shortestloop-rog', dest='cylinder_shortestloop_radius_of_gyration', default=False, action='store_true', help='Use the radius of gyration energy')
    parser.add_option('', '--cylinder-loop-rog', dest='cylinder_loop_radius_of_gyration', default=False, action='store_true', help='Use the radius of gyration energy')
    parser.add_option('', '--loop-rog', dest='loop_radius_of_gyration', default=False, action='store_true', help='Use the radius of gyration energy')
    parser.add_option('', '--constant-energy', dest='constant_energy', default=False, action='store_true', help='Use a constant energy')
    parser.add_option('', '--random-energy', dest='random_energy', default=False, action='store_true', help='Use a random energy')
    parser.add_option('', '--cylinder-loop', dest='cylinder_loop', default=False, action='store_true', help='Use the radius of gyration energy')
    parser.add_option('-y', '--cylinder-intersection', dest='cyl_intersect', default=False, action='store_true', help='Use the cylinder-intersection energy')
    parser.add_option('-g', '--cheating', dest='cheating', default=False, action='store_true', help='Use the rmsd from the real structure as the energy.')
    parser.add_option('', '--sequence-file', dest='sequence_file', default='', help='The file containing sequence for the structure. To be used with the --secondary-structure flag', type='str')
    parser.add_option('', '--sequence-str', dest='sequence_str', default='', help='The sequence of the structure. To be used with the --secondary-structure flag', type='str')
    parser.add_option('', '--eval-energy', dest='eval_energy', default=False, action='store_true', help='Evaluate the energy of the parameter')
    parser.add_option('', '--output-dir', dest='output_dir', default='.', help='Directory to store the sampled_structures', type='str')
    parser.add_option('', '--output-file', dest='output_file', default=None, help='File to output the information about the sampling to. Defaults to standard out', type=str)
    parser.add_option('', '--log-to-file', dest='log_to_file', default=False, help='Print a log of the output to a file in the directory where the best structures are stored.', action="store_true")

    parser.add_option('', '--save-n-best', dest='save_n_best', default=3, help='Save the best n structures.', type=int)
    parser.add_option('', '--step-save', dest='step_save', default=0, help="Save the structure at every n'th step.", type='int')
    parser.add_option('', '--no-background', dest='background', default=True, action='store_false', help="Don't use the background probability distribution.")
    parser.add_option('', '--stats-file', dest='stats_file', 
                      default=fess.data_file('stats/combined.stats'), help='Use a different set of statistics for sampling', type='str') 
    parser.add_option('', '--filtered-stats-file', dest='filtered_stats_file', 
                      default=None, 
                      help='Filter the statistics used for sampling using some other file.', type='str') 
    parser.add_option('', '--output-dir-suffix', dest='output_dir_suffix', default=None, help="Specify an addition to the output directory", type='str')
    parser.add_option('', '--stats-type', dest='stats_type', default=None, help="Use these types of statistics.", type='str')

    parser.add_option('', '--single-sampler', dest='single_sampler', 
                      default=False, help='Use only a single sampler', action='store_true')
    parser.add_option('', '--no-rmsd', dest='no_rmsd', 
                      default=False, help='Refrain from trying to calculate the rmsd.', action='store_true')
    parser.add_option('', '--dists', dest='dists', default=None, 
                      help="Calculate the distance between pairs of nucleotides (i.e. 14,96:14,119)", 
                      type='str')
    parser.add_option('', '--save-iterative-cg-measures', dest='save_iterative_cg_measures', default=False, help='Save the coarse-grain measures every time the energy function is recalculated', action='store_true')
    parser.add_option('', '--jared-dir', dest='jared_dir', default=None, help='Use JAR3D to predict geometries for the interior loops', type='str')
    parser.add_option('', '--start-at-native', dest='start_at_native', default=False, action='store_true', help='Start at the native conformation')
    parser.add_option('', '--fix-loop', dest='fix_loop', default=None, help='Fix the correct coordinates of a particular loop to the correct ones')
    parser.add_option('', '--fix-all-loops', dest='fix_all_loops', default=False, action='store_true',  help='Fix the geometries of all loops in the structure')
    parser.add_option('', '--no-constraint', dest='no_constraint', default=False, action='store_true', help="Don't use a constraint energy")
    parser.add_option('', '--stretch', dest='stretch', default=1.0, help="Stretch RO target distribution.", type='float')
    (options, args) = parser.parse_args()

    fud.pv('options.no_rmsd')


    if len(args) < 1:
        print "Usage: ./gibbs.py temp.comp"
        print "Or ./gibb.py temp.fa. If the extension of the argument file ends in .fa, then treat it as a fasta file."

        sys.exit(1)

    fud.pv('args')

    bgs = []


    for arg in args:
        if arg[-3:] == '.fa':
            bgs += bgs_from_fasta(arg)
        else:
            bgs += [ftmc.CoarseGrainRNA(arg)]

    if len(bgs) > 1:
        print >> sys.stderr, "WARNING: More than one structure entered... only the first will be bearbeitet"

    #bg.calc_bp_distances()

    energies_to_sample = []
    if options.cyl_intersect:
        energies_to_sample += [fbe.CombinedEnergy([], [fbe.CylinderIntersectionEnergy()])]
    if options.radius_of_gyration:
        sse = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor)
        sse.background = options.background
        energies_to_sample += [fbe.CombinedEnergy([], [sse])]

    if options.constant_energy:
        ce = fbe.ConstantEnergy()
        energies_to_sample += [fbe.CombinedEnergy([], [ce])]

    if options.random_energy:
        re = fbe.RandomEnergy()
        energies_to_sample += [fbe.CombinedEnergy([], [re])]

    if options.loop_energy:
        lle = fbe.ShortestLoopDistanceEnergy()
        energies_to_sample += [fbe.CombinedEnergy([], [lle])]

    if options.aminor_shortestloop:
        nonconstraint = []

        bg = bgs[0]
        nonconstraint += [fbe.ShortestLoopDistanceEnergy()]
        '''
        for hloop in bg.hloop_iterator():
            nonconstraint += [fbe.ShortestLoopDistancePerLoop(hloop)]
        '''

        nonconstraint += [fbe.AMinorEnergy(loop_type = 'h')]
        nonconstraint += [fbe.AMinorEnergy(loop_type = 'i')]

        energies_to_sample += [fbe.CombinedEnergy([], nonconstraint)]

    if options.specific_aminor:
        nonconstraint = [fbe.RadiusOfGyrationEnergy()]
        bg = bgs[0]

        # if we specify all, then we try and maximize the A-Minor interaction potential
        # for all internal and hairpin loops
        if len(options.specific_aminor.split(',')) == 1 and options.specific_aminor == 'all':
            for d in bg.defines:
                if d[0] == 'i' or d[0] == 'h':
                    if 'AA' in "".join(bg.get_define_seq_str(d)):
                        nonconstraint += [fbe.SpecificAMinorEnergy(loop_name=d, energy_prefactor=1)]
                        fud.pv('d')
        else:
            for sa in options.specific_aminor.split(','):
                nonconstraint += [fbe.SpecificAMinorEnergy(loop_name=sa, energy_prefactor=1)]

        for hloop in bg.hloop_iterator():
            if len(list(bg.define_residue_num_iterator(hloop))) > 4:
                fud.pv('hloop')
                nonconstraint += [fbe.ShortestLoopDistancePerLoop(hloop)]

        energies_to_sample += [fbe.CombinedEnergy([], nonconstraint)]

    if options.aminor_perloop:
        nonconstraint = []

        bg = bgs[0]
        for hloop in bg.hloop_iterator():
            nonconstraint += [fbe.ShortestLoopDistancePerLoop(hloop)]

        nonconstraint += [fbe.AMinorEnergy(loop_type = 'h')]
        nonconstraint += [fbe.AMinorEnergy(loop_type = 'i')]

        energies_to_sample += [fbe.CombinedEnergy([], nonconstraint)]

    if options.cylinder_perloop_radius_of_gyration:
        cie = fbe.CylinderIntersectionEnergy()
        #lle = fbe.ShortestLoopDistanceEnergy()
        rog = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor)
        nonconstraint = [rog, cie]

        bg = bgs[0]
        for hloop in bg.hloop_iterator():
            nonconstraint += [fbe.ShortestLoopDistancePerLoop(hloop)]

        rog.background = options.background
        energies_to_sample += [fbe.CombinedEnergy([], nonconstraint)]

    if options.cylinder_shortestloop_radius_of_gyration:
        cie = fbe.CylinderIntersectionEnergy()
        lle = fbe.ShortestLoopDistanceEnergy()
        rog = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor)
        rog.background = options.background
        energies_to_sample += [fbe.CombinedEnergy([], [lle, rog, cie])]

    if options.cylinder_loop_radius_of_gyration:
        cie = fbe.CylinderIntersectionEnergy()
        lle = fbe.LoopLoopEnergy()
        rog = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor)
        rog.background = options.background
        energies_to_sample += [fbe.CombinedEnergy([], [lle, rog, cie])]

    if options.cylinder_loop:
        lle = fbe.LoopLoopEnergy()
        cie = fbe.CylinderIntersectionEnergy()
        sse.background = options.background
        energies_to_sample += [fbe.CombinedEnergy([], [cie, lle])]

    if options.aminor:
        ame1 = fbe.AMinorEnergy(loop_type='h')
        ame2 = fbe.AMinorEnergy(loop_type='i')
        energies_to_sample += [fbe.CombinedEnergy([], [ame1, ame2])]
        #energies_to_sample += [fbe.CombinedEnergy([], [sse, ame1])]

    if options.aminor_radius_of_gyration:
        sse = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor)
        ame1 = fbe.AMinorEnergy(loop_type='h')
        ame2 = fbe.AMinorEnergy(loop_type='i')
        sse.background = options.background
        energies_to_sample += [fbe.CombinedEnergy([], [sse, ame1, ame2])]
        #energies_to_sample += [fbe.CombinedEnergy([], [sse, ame1])]

    if options.cylinder_radius_of_gyration:
        sse = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor)
        cie = fbe.CylinderIntersectionEnergy()
        sse.background = options.background
        energies_to_sample += [fbe.CombinedEnergy([], [sse, fbe.CylinderIntersectionEnergy()])]

    if options.loop_radius_of_gyration:
        sse = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor)
        sse.background = options.background
        energies_to_sample += [fbe.CombinedEnergy([], [sse, fbe.LoopLoopEnergy()])]

    if options.distance_energy:
        energies_to_sample += [fbe.DistanceEnergy(bg.get_long_range_constraints())]


    if len(energies_to_sample) == 0 or options.aminor_perloop_radius_of_gyration:
        rog = fbe.RadiusOfGyrationEnergy(energy_prefactor=options.energy_prefactor, adjustment=options.stretch)
        nonconstraint = [rog]

        bg = bgs[0]
        for hloop in bg.hloop_iterator():
            nonconstraint += [fbe.ShortestLoopDistancePerLoop(hloop)]
        nonconstraint += [fbe.AMinorEnergy(loop_type = 'h')]
        nonconstraint += [fbe.AMinorEnergy(loop_type = 'i')]
        nonconstraint += [fbe.StemVirtualResClashEnergy()]

        rog.background = options.background
        energies_to_sample += [fbe.CombinedEnergy([], nonconstraint)]

    if options.clamp is not None:
        pairs = options.clamp.split(':')
        bg = bgs[0]


        for p in pairs:
            r1,r2 = p.split(',')

            try:
                # initially we assume the clamp target are residue numbers
                r1 = int(r1)
                r2 = int(r2)

                e1 = bg.get_node_from_residue_num(int(r1))
                e2 = bg.get_node_from_residue_num(int(r2))
            except ValueError:
                # ... or they are element names
                e1 = r1
                e2 = r2

            if e1 not in bg.defines.keys() or e2 not in bg.defines.keys():
                print >>sys.stderr, "ERROR: Invalid values for clamping"
                sys.exit(1)

            if e1 == e2:
                print >>sys.stderr, "Can't clamp identical elements"

            print >>sys.stderr, "clamping {0}, {1}".format(e1,e2)
            # the first energy to sample should be a CombinedEnergy
            energies_to_sample[0].energies += [fbe.DistanceExponentialEnergy(e1,e2,15.,1.)]

    for bg in bgs:
        options.bg_filename = args[0]
        fud.pv('energies_to_sample')

        if len(list(bg.stem_iterator())) == 0:
            print >> sys.stderr, "Cannot simulate an open chain, the structure needs to have at least one stem"
            sys.exit(1)

        predict(bg, energies_to_sample, options)
Ejemplo n.º 18
0
def main():
    usage = """
    python cg_to_fornac_html.py file1.cg file2.cg

    Convert coarse grain files to html files using fornac
    to display a 2D version of the structure.
    """
    num_args= 1
    parser = OptionParser(usage=usage)

    parser.add_option('-d', '--distance', dest='distance', default=25, help="Draw links between elements that are within a certain distance from each other", type='float')
    parser.add_option('-b', '--bp-distance', dest='bp_distance', default=16, help="Draw links only between nucleotides which are so many nucleotides apart", type='int')
    parser.add_option('-s', '--sort-by', dest='sort_by', default='mcc', help="What to sort by (options: mcc, pca)", type='string')
    parser.add_option('-n', '--names', dest='names', default=False, action='store_true', help='Add the name of the structure to the display')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    structs = []
    pair_bitmaps = []
    cgs = []
    all_links = []
    mccs = []
    cm=None
    for filename in args:
        cg = ftmc.CoarseGrainRNA(filename)
        cgs += [cg]
        if not cm:
            cm=ftme.ConfusionMatrix(cg)
        (links, pair_bitmap) = extract_extra_links(cg, options.distance, options.bp_distance,
                                                  correct_links = None if len(all_links) == 0 else all_links[0])

        all_links += [links]

        pair_bitmaps += [pair_bitmap]
        mcc = ftme.mcc(cm.evaluate(cg))
        rmsd = ftme.cg_rmsd(cgs[0], cg)

        seq_struct = {"sequence": cg.seq,
                      "structure": cg.to_dotbracket_string(),
                      "extraLinks": links}

        fud.pv('options.names')
        fud.pv('mcc, rmsd')
        if options.names:
            seq_struct['name'] = op.basename(filename) + " ({:.2f},{:.1f})".format(mcc, rmsd)
        else:
            seq_struct['name'] = ''

        structs += [seq_struct]
        mccs += [mcc]

    if options.sort_by == 'pca':
        print >>sys.stderr, "Sorting by pca"
        ix = reorder_structs(pair_bitmaps) 
    else:
        print >>sys.stderr, "Sorting by mcc"
        ix = np.argsort(-np.array(mccs))

    new_array = [0 for i in range(len(ix))]
    for i,x in enumerate(ix):
        new_array[i] = structs[x]

    print output_template.format(json.dumps(new_array))
Ejemplo n.º 19
0
def reconstruct_element(cg_to, cg_from, elem_to, elem_from, chain_to, chain_from, close_loop=True, reverse=False):
    '''
    Take an element (elem2) from one chain (chain2, cg2) and
    place it on the new chain while aligning on the adjoining elements.

    The neighboring elemtns need to be present in chain_to in order
    for the next element to be aligned to their starting and ending
    positions.

    The dimensions and type of elem_to and elem_from need to be identical.

    @param cg_to: The coarse-grain representation of the target chain
    @param cg_from: The coarse-grain representation of the source chain
    @param elem_to: The element to replace
    @param elem_from: The source element
    @param chain_to: The chain to graft onto
    @param chain_from: The chain to excise from
    '''
    # get the range of the nucleotides
    ranges_to = cg_to.define_range_iterator(elem_to, adjacent=True, 
                                            seq_ids=True)
    ranges_from = cg_from.define_range_iterator(elem_from, adjacent=True, 
                                                seq_ids=True)

    chains_to_align = []
    handles = []
    # the chains containing the aligned and loop-closed nucleotides
    new_chains = []

    # iterate over each strand
    for r1,r2 in zip(ranges_to, ranges_from):
        chains_to_align += [ftup.extract_subchain(chain_from, r2[0], r2[1])]
        handles += [r1 + r2]

        align_starts(chain_to, chains_to_align[-1], [handles[-1]], end=2, reverse=reverse)

        r = 0.
        loop_chain = chains_to_align[-1]
        if close_loop:
            (r, loop_chain) = align_and_close_loop(cg_to.seq_length, chain_to, 
                                                       chains_to_align[-1], 
                                                       [handles[-1]])
        fud.pv('elem_to, r')
        new_chains += [loop_chain]

        counter = 1
        for res1, res2 in zip(cg_to.iterate_over_seqid_range(*r1),
                              cg_from.iterate_over_seqid_range(*r2)):

            if elem_to[0] != 'f':
                # omit the frist nucleotide, since that should be part of
                # the preceding stem, except in the case of 5' unpaired regions
                if counter > 1:
                    loop_chain[res2].id = res1
                    add_residue_to_rosetta_chain(chain_to, loop_chain[res2])
            else:
                loop_chain[res2].id = res1
                add_residue_to_rosetta_chain(chain_to, loop_chain[res2])

            counter += 1

    return new_chains
Ejemplo n.º 20
0
def main():
    usage = """
    python cg_to_fornac_html.py file1.cg file2.cg

    Convert coarse grain files to html files using fornac
    to display a 2D version of the structure.
    """
    num_args = 1
    parser = OptionParser(usage=usage)

    parser.add_option(
        '-d',
        '--distance',
        dest='distance',
        default=25,
        help=
        "Draw links between elements that are within a certain distance from each other",
        type='float')
    parser.add_option(
        '-b',
        '--bp-distance',
        dest='bp_distance',
        default=16,
        help=
        "Draw links only between nucleotides which are so many nucleotides apart",
        type='int')
    parser.add_option('-s',
                      '--sort-by',
                      dest='sort_by',
                      default='mcc',
                      help="What to sort by (options: mcc, pca)",
                      type='string')
    parser.add_option('-n',
                      '--names',
                      dest='names',
                      default=False,
                      action='store_true',
                      help='Add the name of the structure to the display')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    structs = []
    pair_bitmaps = []
    cgs = []
    all_links = []
    mccs = []
    cm = None
    for filename in args:
        cg = ftmc.CoarseGrainRNA(filename)
        cgs += [cg]
        if not cm:
            cm = ftme.AdjacencyCorrelation(cg)
        (links, pair_bitmap) = extract_extra_links(
            cg,
            options.distance,
            options.bp_distance,
            correct_links=None if len(all_links) == 0 else all_links[0])

        all_links += [links]

        pair_bitmaps += [pair_bitmap]
        mcc = ftme.mcc(cm.evaluate(cg))
        rmsd = ftme.cg_rmsd(cgs[0], cg)

        seq_struct = {
            "sequence": cg.seq,
            "structure": cg.to_dotbracket_string(),
            "extraLinks": links
        }

        fud.pv('options.names')
        fud.pv('mcc, rmsd')
        if options.names:
            seq_struct['name'] = op.basename(
                filename) + " ({:.2f},{:.1f})".format(mcc, rmsd)
        else:
            seq_struct['name'] = ''

        structs += [seq_struct]
        mccs += [mcc]

    if options.sort_by == 'pca':
        print("Sorting by pca", file=sys.stderr)
        ix = reorder_structs(pair_bitmaps)
    else:
        print("Sorting by mcc", file=sys.stderr)
        ix = np.argsort(-np.array(mccs))

    new_array = [0 for i in range(len(ix))]
    for i, x in enumerate(ix):
        new_array[i] = structs[x]

    print(output_template.format(json.dumps(new_array)))
Ejemplo n.º 21
0
    def coordinates_to_pymol(self, cg):
        loops = list(cg.hloop_iterator())

        for key in cg.coords.keys():
            if self.constraints is not None:
                if key not in self.constraints:
                    continue

            (p, n) = cg.coords[key]
            color = self.get_element_color(key)

            if key[0] == 's':
                self.add_stem_like(cg, key)
                self.draw_bounding_boxes(cg, key)
            else:
                if key[0] == 'h':
                    if self.add_loops:
                        if key in loops:
                            self.add_segment(p, n, color, 1.0,
                                             key + " " + str(cg.get_length(key)))
                elif key[0] == 'm':
                    twists = cg.get_twists(key)

                    # check if the multiloop is longer than one. If it's not, then
                    # it has an empty define and we its length will be 1
                    if len(cg.defines[key]) == 0:
                        self.add_segment(p, n, color, 1.0,
                                         key + " 1")
                    else:
                        self.add_segment(p, n, color, 1.0,
                                         key + " " +
                                         str(cg.defines[key][1] -
                                         cg.defines[key][0] + 1))

                    self.add_segment(p, p+ 7 * twists[0], 'light gray', 0.3)
                    self.add_segment(n, n+ 7 * twists[1], 'light gray', 0.3)

                    x = (p + n) / 2
                    t = ftuv.normalize((twists[0] + twists[1]) / 2.)
                    self.add_segment(x, x + 7 * t, 'middle gray', 0.3)
                elif key[0] == 'f':
                    if self.visualize_three_and_five_prime:
                        self.add_segment(p, n, color, 1.0,
                                         key + " " +
                                         str(cg.defines[key][1] -
                                         cg.defines[key][0] + 1) + "")

                elif key[0] == 't':
                    if self.visualize_three_and_five_prime:
                        self.add_segment(p, n, color, 1.0,
                                         key + " " +
                                         str(cg.defines[key][1] -
                                         cg.defines[key][0]) + "")
                else:
                    #self.add_stem_like(cg, key, "yellow", 1.0)
                    self.add_segment(p, n, color, 1.0, key)

        if self.add_longrange:
            for key1 in cg.longrange.keys():
                for key2 in cg.longrange[key1]:
                    try:

                        p = cuv.line_segment_distance(cg.coords[key1][0],
                                                      cg.coords[key1][1],
                                                      cg.coords[key2][0],
                                                      cg.coords[key2][1])
                        (point1, point2) = p

                        #point1 = cg.get_point(key1)
                        #point2 = cg.get_point(key2)

                        dash_length = 0.6
                        gap_length = dash_length * 2
                        direction = ftuv.normalize(point2 - point1)

                        num_dashes = ftuv.magnitude(point2 - point1) / (dash_length + gap_length)
                        fud.pv('num_dashes')

                        for i in range(int(num_dashes)):
                            self.add_segment(point1 + i * (dash_length + gap_length) * direction, 
                                             point1 + (i * (dash_length + gap_length) + dash_length) * direction, "purple",
                                             0.3, "")

                            '''
                            self.add_segment(point1, point2, "purple",
                                             0.3, key1 + " " + key2)
                            
                            '''
                    except:
                        continue

        if self.encompassing_stems:
            self.add_encompassing_cylinders(cg, 7.)

        if self.max_stem_distances > 0:
            for (s1, s2) in it.permutations(cg.stem_iterator(), r=2):
                (i1, i2) = cuv.line_segment_distance(cg.coords[s1][0],
                                                     cg.coords[s1][1],
                                                     cg.coords[s2][0],
                                                     cg.coords[s2][1])
                if cuv.magnitude(i2 - i1) < self.max_stem_distances:
                    #self.add_segment(i1, i2, 'cyan', 0.3, s1 + " " + s2)
                    self.add_segment(i1, i2, 'cyan', 0.3)

        if self.virtual_atoms:
            va = ftug.virtual_atoms(cg, sidechain=False)

            atom_width = 0.5
            for i,r in enumerate(sorted(va.keys())):
                for a in va[r].keys():
                    if self.rainbow:
                        import matplotlib
                        matplotlib.use('Agg')
                        import matplotlib.pyplot as plt
                        cmap = plt.get_cmap('gist_rainbow')
                        self.add_sphere(va[r][a], 
                                        color_rgb = cmap(i / float(len(va.keys()))), 
                                        width=atom_width)
                    else:
                        d = cg.get_node_from_residue_num(r)
                        if d[0] == 's':
                            self.add_sphere(va[r][a], 'green', width=atom_width)
                        elif d[0] == 'i':
                            self.add_sphere(va[r][a], 'yellow', width=atom_width)
                        elif d[0] == 'm':
                            self.add_sphere(va[r][a], 'red', width=atom_width)
                        elif d[0] == 'h':
                            self.add_sphere(va[r][a], 'blue', width=atom_width)

        if self.basis:
            for d in cg.defines.keys():
                origin, basis = ftug.element_coord_system(cg, d)

                self.add_segment(origin, origin + 7. * basis[1], 'purple', 2.)

        print >>sys.stderr, "energy_function:", self.energy_function
        # print the contributions of the energy function, if one is specified
        if self.energy_function is not None:
            print >>sys.stderr, "key"
            sum_energy = 0.

            e_func = self.energy_function
            e_func_iter = e_func.interaction_energy_iter(cg, background=False)
            int_energies = list(e_func_iter)
            max_energy = max(int_energies, key=lambda x: x[1])
            print >>sys.stderr, "max_energy:", max_energy

            for (interaction, energy) in int_energies:
                (p, n) = (cg.get_point(interaction[0]),
                          cg.get_point(interaction[1]))
                scaled_energy = - max_energy[1] + energy

                self.add_segment(p, n, 'purple', 3 * np.exp(scaled_energy))

                sum_energy += energy

        if self.stem_stem_orientations is not None:
            for (s1, s2) in it.permutations(cg.stem_iterator(), 2):
                '''
                if cg.are_adjacent_stems(s1, s2):
                    continue
                '''

                if s1 != 's65':
                    if s2 != 's65':
                        continue

                s1_vec = cg.coords[s1][1] - cg.coords[s1][0]
                s2_vec = cg.coords[s2][1] - cg.coords[s2][0]
                (i1, i2) = cuv.line_segment_distance(cg.coords[s1][0],
                                                     cg.coords[s1][1],
                                                     cg.coords[s2][0],
                                                     cg.coords[s2][1])
                i_vec = i2 - i1

                #i_rej will be orthogonal to s1_vec in the direction
                #of s2
                i_rej = cuv.vector_rejection(i_vec, s1_vec)

                #plane_vec will be orthogonal to s1_vec and to the direction
                # of s2
                plane_vec = np.cross(i_rej, s1_vec)

                # s2_proj is in the intersection plane
                s2_proj_in = cuv.vector_rejection(s2_vec, plane_vec)
                # s2 proj_out is out of the intersection plane
                #s2_proj_out = cuv.vector_rejection(s2_vec, i_rej)

                start_point = cg.coords[s1][0] + 5 * cg.twists[s1][0]
                ortho_offset = cuv.magnitude(i_rej)
                dist = cuv.magnitude(i_vec) + 0.0001

                lateral_offset = m.sqrt(dist ** 2 - ortho_offset ** 2)

                if lateral_offset > 10:
                    continue

                '''
                #self.add_segment(start_point,
                                  start_point + 10 * cuv.normalize(s2_vec),
                                  'white', 0.5)
                #self.add_segment(start_point,
                                  start_point + 5 * cuv.normalize(plane_vec),
                                  'magenta', 0.5)
                #self.add_segment(start_point,
                                  start_point + 5 * cuv.normalize(i_vec),
                                  'cyan', 0.5)
                #self.add_segment(i1, i1 + i_rej,  'cyan', 0.5)
                '''
                self.add_segment(start_point,
                                 start_point + 7 * cuv.normalize(s2_proj_in),
                                 'white', 1.5)
                '''
Ejemplo n.º 22
0
def main():
    usage = """
    ./visualize_cg.py cg_file

    Display the coarse-grain representation of a structure in pymol.
    """
    num_args = 1
    parser = OptionParser(usage=usage)

    # parser.add_option('-u', '--useless', dest='uselesss',
    # default=False, action='store_true', help='Another useless option')
    parser.add_option('-g', '--highlight', dest='highlight', default=None, help="Highlight some elements", type='str')
    parser.add_option('-o', '--output', dest='output', default=None, help="Create a picture of the scene and exit",
                      type='str')
    parser.add_option('-r', '--longrange', dest='longrange', default=False, action='store_true',
                      help="Display long-range interactions")
    parser.add_option('-l', '--loops', dest='loops', default=True, action='store_false',
                      help="Don't display the coarse-grain hairpin loops")
    parser.add_option('-c', '--cones', dest='cones', default=False, action='store_true',
                      help="Display cones that portrude from the stems")
    parser.add_option('-x', '--text', dest='text', default=False, action='store_true', help="Add labels to the figure.")
    parser.add_option('-a', '--align', dest='align', default=False, action='store_true',
                      help='Align all of the structures with the first')
    parser.add_option('-e', '--encompassing-stems', dest='encompassing_stems', default=False, action='store_true',
                      help='Show the big stems that encompass the colinear ones.')
    parser.add_option('-v', '--virtual-atoms', dest='virtual_atoms', default=False, action='store_true',
                      help='Display the virtual atoms')
    parser.add_option('-d', '--distance', dest='distance', default=None,
                      help="Draw the lines between specified virtual residues")
    parser.add_option('-b', '--basis', dest='basis', default=False, action='store_true',
                      help='Display the coordinate basis of each element')
    parser.add_option('', '--batch', dest='batch', default=False, action='store_true', help='Start pymol in batch mode')
    parser.add_option('', '--sidechain-atoms', dest='sidechain_atoms', default=False, action='store_true',
                      help='Include the sidechain atoms. Automatically enables --virtual-atoms')
    parser.add_option('', '--rainbow', dest='rainbow', default=False, action='store_true',
                      help='Color each of the nucleotide positions (i.e. average atoms) according to the colors of \
                      the rainbow and their position')
    parser.add_option('', '--only-elements', dest='only_elements', default=None, help='Display only these elements '
                                                                                      'element names should be '
                                                                                      'separated by commas')
    parser.add_option('', '--color-gradual', dest='color_gradual', default=None, help='Color the specified elements'
                                                                                      'gradually from one to the other, example (i1,i4,m1)', type='str')

    (options, args) = parser.parse_args()

    print "hi"
    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)
    print "hi1"

    pp = cvp.PymolPrinter()
    pp.add_loops = options.loops
    pp.draw_cones = options.cones
    # sys.exit(1)
    pp.add_longrange = options.longrange
    pp.print_text = options.text
    pp.encompassing_stems = options.encompassing_stems
    pp.virtual_atoms = options.virtual_atoms
    pp.sidechain_atoms = options.sidechain_atoms
    pp.basis = options.basis
    pp.rainbow = options.rainbow

    if options.only_elements is not None:
        pp.only_elements = options.only_elements.split(',')

    cgs = []
    for a in args:
        cgs += [cmg.CoarseGrainRNA(a)]

    if options.align:
        align_cgs(cgs)

    if options.color_gradual is not None:
        pp.element_specific_colors = dict()
        import matplotlib.pyplot as plt
        cmap = plt.get_cmap('coolwarm')

        for d in cgs[0].defines:
            pp.element_specific_colors[d]= 'black'

        to_color_nodes = options.color_gradual.split(',')
        for i,node in enumerate(to_color_nodes):
            print node, cmap(i / float(len(to_color_nodes)))
            pp.element_specific_colors[node] = cmap(i / float(len(to_color_nodes)))

    for i, cg in enumerate(cgs):
        if i > 0:
            pp.color_modifier = .3
            #pp.override_color = 'middle gray'

        pp.coordinates_to_pymol(cg)


    # highlight things in purple
    if options.highlight is not None:
        for s in options.highlight.split(','):
            fud.pv('s')
            pp.add_twists = False
            pp.add_stem_like(cg, s, color='purple', width=3.)

    # display the distances between nucleotides
    if options.distance is not None:
        virtual_atoms = ftug.virtual_atoms(cg, sidechain=False)

        for dist_pair in options.distance.split(':'):
            fud.pv('dist_pair')
            fr, to = dist_pair.split(',')

            fr = int(fr)
            to = int(to)

            pp.add_dashed(virtual_atoms[fr]["C1'"], virtual_atoms[to]["C1'"], width=1.2)

    with tf.NamedTemporaryFile() as f:
        with tf.NamedTemporaryFile(suffix='.pml') as f1:
            f.write(pp.pymol_string())
            f.flush()

            pymol_cmd = 'hide all\n'
            pymol_cmd += 'run %s\n' % (f.name)
            pymol_cmd += 'show cartoon, all\n'
            pymol_cmd += 'bg white\n'
            pymol_cmd += 'clip slab, 10000\n'
            pymol_cmd += 'orient\n'

            if options.output is not None:
                pymol_cmd += 'ray\n'
                pymol_cmd += 'png %s\n' % (options.output)
                pymol_cmd += 'quit\n'

            f1.write(pymol_cmd)
            f1.flush()

            print "f1.name:", f1.name

            if options.batch:
                p = sp.Popen(['pymol', '-cq', f1.name], stdout=sp.PIPE, stderr=sp.PIPE)
            else:
                p = sp.Popen(['pymol', f1.name], stdout=sp.PIPE, stderr=sp.PIPE)

            out, err = p.communicate()
            print >>sys.stderr, "err:", err
Ejemplo n.º 23
0
def json_to_fasta(rna_json_str):
    '''
    Convert an RNA json as returned by fasta to json into a fasta string
    (which will later be used to create a BulgeGraph and the another json.

    :param rna_json_str: A json string representation of an RNA as returned by
    fasta_to_json 
    :return: A fasta string representing this molecule along with
    the x and y coordinates... (fasta, xs, ys), where xs and ys are lists

    '''
    rna_json = json.loads(rna_json_str)

    # store the pair tables for each molecule separately
    pair_list = col.defaultdict(list)
    node_list = col.defaultdict(list)
    label_list = col.defaultdict(list)

    # make dictionaries hashable, it's ok here because it will only be used
    # for the nodes and the links and their values don't change
    class hashabledict(dict):
        def __hash__(self):
            return hash(tuple(sorted(self.items())))

    # store which molecule each node is in
    hashable_links = [hashabledict(l) for l in rna_json['links']]
    hashable_nodes = [hashabledict(n) for n in rna_json['nodes']]

    all_nodes = set([n for n in hashable_nodes if n['node_type'] != 'pseudo'])
    links_dict = col.defaultdict(list)

    for link in hashable_links:
        if link['link_type'] == 'backbone' or link['link_type'] == 'label_link':
            links_dict[hashabledict(
                link['source'])] += [hashabledict(link['target'])]
            links_dict[hashabledict(
                link['target'])] += [hashabledict(link['source'])]

    trees = []
    to_visit = []
    nodes_to_trees = dict()

    # calculate the list of trees in the forest of RNA molecules
    # trees correspond to individual molecules
    # different trees do not share backbone bonds
    while len(all_nodes) > 0:
        # if there's nodes left, then there's a new tree to be made
        to_visit += [list(all_nodes)[0]]
        curr_tree = set()

        while len(to_visit) > 0:
            # the current tree has more nodes
            curr_node = to_visit.pop()
            all_nodes.remove(curr_node)
            curr_tree.add(curr_node)
            nodes_to_trees[curr_node] = curr_tree

            for neighbor in links_dict[curr_node]:
                # add all of the neighbors
                if neighbor not in all_nodes:
                    # we've already seen this neighbor
                    continue
                to_visit.append(neighbor)

        trees += [curr_tree]

    different_tree_links = []
    for link in rna_json['links']:
        # only consider base-pair links
        if link['link_type'] != 'basepair' and link[
                'link_type'] != 'pseudoknot':
            continue

        from_node = hashabledict(link['source'])
        to_node = hashabledict(link['target'])
        #from_node = rna_json['nodes'][link['source']]
        #to_node = rna_json['nodes'][link['target']]

        if nodes_to_trees[from_node] == nodes_to_trees[to_node]:
            # the position of each node in the RNA is one greater than its id

            pair_list[frozenset(nodes_to_trees[from_node])] += [
                (int(from_node['id']), int(to_node['id']))
            ]
            pair_list[frozenset(nodes_to_trees[from_node])] += [
                (int(to_node['id']), int(from_node['id']))
            ]
        else:
            print >> sys.stderr, "Different trees"
            different_tree_links += [((from_node['x'], from_node['y']),
                                      (to_node['x'], to_node['y']))]

    # list the nods in each molecule
    for node in hashable_nodes:
        if node['node_type'] == 'nucleotide':
            node_list[frozenset(nodes_to_trees[node])] += [
                (node['id'], node['name'], node['x'], node['y'],
                 node['struct_name'], node['uid'])
            ]

        if node['node_type'] == 'label':
            print >> sys.stderr, "adding label"
            label_list[frozenset(nodes_to_trees[node])] += [(node['x'],
                                                             node['y'])]

    all_fastas = []
    all_xs = []
    all_ys = []
    all_uids = []

    for key in node_list.keys():
        pair_table = fus.tuples_to_pairtable(pair_list[key],
                                             len(node_list[key]))
        dotbracket = fus.pairtable_to_dotbracket(pair_table)

        seq = "".join(n[1] for n in node_list[key])
        fud.pv('seq')
        fud.pv('len(seq)')
        fud.pv('len(label_list[key])')

        all_xs += [[n[2]
                    for n in node_list[key]] + [n[0] for n in label_list[key]]]
        all_ys += [[n[3]
                    for n in node_list[key]] + [n[1] for n in label_list[key]]]

        fud.pv('len(all_xs[-1])')

        all_uids += [[n[5] for n in node_list[key]]]

        all_fastas += [
            ">{}\n{}\n{}".format(node_list[key][0][4], seq, dotbracket)
        ]
        fud.pv('all_fastas')

    return (all_fastas, all_xs, all_ys, all_uids, different_tree_links)
Ejemplo n.º 24
0
def main():
    usage = """
    ./helix_orienation_divergences.py

    Analyze how much the helix-helix orientations diverge between two data sets.
    """
    num_args = 0
    parser = OptionParser()

    parser.add_option('-r',
                      '--resolution',
                      dest='resolution',
                      default=10,
                      help="The resolution of the resulting plot",
                      type='int')
    parser.add_option('-a',
                      '--angle',
                      dest='angle',
                      default=0,
                      help="The angle of the camera",
                      type='float')
    parser.add_option(
        '-f',
        '--fig-name',
        dest='fig_name',
        default='',
        help=
        "The name of the file to save the figure to. If it is not specified, the figure will not be saved",
        type='str')
    parser.add_option('-i',
                      '--interior_loops',
                      dest='interior_loops',
                      default=False,
                      help='Cluster only the interior loops',
                      action='store_true')
    parser.add_option('-m',
                      '--multi_loops',
                      dest='multi_loops',
                      default=False,
                      help='Cluster only the interior loops',
                      action='store_true')

    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    column_names = [
        'type', 'pdb', 's1', 's2', 'u', 'v', 't', 'r', 'u1', 'v1', 'atype',
        'something1', 'something2', 'sth3', 'sth4'
    ]

    real_stats = ftms.ConformationStats('fess/stats/real.stats').angle_stats
    sampled_stats = ftms.ConformationStats('fess/stats/temp.stats').angle_stats

    # count how many statistics we have for each statistic type
    stat_counts = c.defaultdict(int)
    for sc in real_stats.keys():
        stat_counts[sc] += len(real_stats[sc])

    histograms = dict()
    for b in stat_counts.keys():
        if b[2] != 2.:
            # only look at type 2 angles
            continue

        if options.interior_loops:
            if b[0] == 1000 or b[1] == 1000:
                continue
        if options.multi_loops:
            if b[0] != 1000 and b[1] != 1000:
                continue

        (selected_sizes,
         count) = get_nearest_dimension_sizes(b, stat_counts, 1)

        if count < 3:
            continue

        fud.pv('b, selected_sizes')

        combined_real = []

        # get the statistics that correspond to the selected sampled sizes
        for ss in selected_sizes:
            #ss_r = get_certain_angle_stats(real_stats, ss)
            ss_r = real_stats[ss]

            combined_real += list(ss_r[['u', 'v']].as_matrix())

        num_points = len(combined_real)
        combined_real = np.array(combined_real)
        #histograms[b] = (np.histogram2d(combined_real[:,0], combined_real[:,1], range=[[0, m.pi], [-m.pi, m.pi]])[0] + 0.5) / float(num_points)
        histograms[b] = combined_real

    dists = []
    named_dists = dict()
    pp_dists = dict()
    for k1, k2 in it.combinations(histograms.keys(), 2):
        per_point_distances = []
        for p1 in histograms[k1]:
            point_distances = []
            for p2 in histograms[k2]:
                point_distances += [ftuv.magnitude(p1 - p2)]
            per_point_distances += [min(point_distances)]

        for p2 in histograms[k2]:
            point_distances = []
            for p1 in histograms[k1]:
                point_distances += [ftuv.magnitude(p1 - p2)]
            per_point_distances += [min(point_distances)]

        dists += [max(per_point_distances)]
        named_dists[(k1, k2)] = max(per_point_distances)
        pp_dists[(k1, k2)] = per_point_distances
        '''
        kl = histograms[k1] * (histograms[k1] / histograms[k2])
        kl = sum(map(sum, kl))
        dists += [kl]
        '''

    fud.pv('dists')
    Z = sch.complete(dists)
    fud.pv('Z')
    sch.dendrogram(Z, labels=histograms.keys(), leaf_rotation=90)
    plt.subplots_adjust(bottom=0.25)

    plt.show()

    k1 = (6, 7, 2)
    k2 = (5, 6, 2)

    rs = get_certain_angle_stats(real_stats, k1)
    ss = get_certain_angle_stats(real_stats, k2)

    fud.pv('named_dists[(k1,k2)]')
    fud.pv('pp_dists[(k1,k2)]')

    real_us = rs[['u', 'v']].as_matrix()
    sampled_us = ss[['u', 'v']].as_matrix()

    U_r = real_us[:, 0]
    V_r = real_us[:, 1]

    U_s = sampled_us[:, 0]
    V_s = sampled_us[:, 1]

    total_r = len(U_r)
    total_s = len(U_s)

    hr = np.histogram2d(U_r, V_r)
    hs = np.histogram2d(U_s, V_s)

    pseudo_r = (hr[0] + 1) / total_r
    pseudo_s = (hs[0] + 1) / total_r
    kl = pseudo_r * (pseudo_r / pseudo_s)
    fud.pv('kl')
    fud.pv('sum(map(sum, kl))')

    X_r = np.sin(U_r) * np.cos(V_r)
    Y_r = np.sin(U_r) * np.sin(V_r)
    Z_r = np.cos(U_r)

    r = 1.
    X_s = r * np.sin(U_s) * np.cos(V_s)
    Y_s = r * np.sin(U_s) * np.sin(V_s)
    Z_s = r * np.cos(U_s)

    fud.pv('real_us')

    real_us_orig = np.copy(real_us)
    sampled_us_orig = np.copy(sampled_us)

    print len(real_us), len(sampled_us)

    fig = plt.figure(figsize=(10, 10))
    ax = Axes3D(fig)

    a = Arrow3D([-1.3, 1.3], [0, 0], [0, 0],
                mutation_scale=20,
                lw=5,
                arrowstyle="-|>",
                color="g")
    ax.add_artist(a)

    ax.plot(X_r, Y_r, Z_r, 'bo', alpha=0.3)
    ax.plot(X_s, Y_s, Z_s, 'ro', alpha=0.3)

    u, v = np.mgrid[0:2 * np.pi:20j, 0:np.pi:10j]
    x = np.cos(u) * np.sin(v)
    y = np.sin(u) * np.sin(v)
    z = np.cos(v)
    ax.plot_wireframe(x, y, z, color="y")

    #surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, facecolors=colors,
    #       linewidth=0, antialiased=False)

    ax._axis3don = False
    ax.set_zlim3d(-1, 1)
    ax.w_zaxis.set_major_locator(LinearLocator(6))
    ax.view_init(0, options.angle)
    '''
    plt.subplots_adjust(left=0.4, right=0.9, top=0.9, bottom=0.1)

    for i in xrange(0, 360, 40):
        savefig("fig%d.png", (i))
    '''
    '''
    sm = cm.ScalarMappable(cmap=cm.jet)
    sm.set_array(W)
    fig.colorbar(sm)
    '''

    if options.fig_name != "":
        plt.savefig(options.fig_name, bbox_inches='tight')
    else:
        plt.show()