Ejemplo n.º 1
0
def fasta_to_positions(fasta_text):
    fgb.from_fasta_text(fasta_text)
    bp_string = bg.to_dotbracket_string()

    print >>sys.stderr, 'bp_string', bp_string;
    RNA.cvar.rna_plot_type = 1
    coords = RNA.get_xy_coordinates(bp_string)
    xs = np.array([coords.get(i).X for i in range(len(bp_string))])
    ys = np.array([coords.get(i).Y for i in range(len(bp_string))])

    return zip(xs,ys)
Ejemplo n.º 2
0
    def test_angle_stat_get_angle_from_cg_1(self):
        fa_text = """>1
        AAACCGGGCCCCCCAAUUU
        (((..(((...)))..)))
        """

        bg = fgb.from_fasta_text(fa_text)
        cg = ftmc.from_bulge_graph(bg)

        cg.coords["s0"] = np.array([0., 0., 0.]), np.array([0., 0., 1.])
        cg.twists["s0"] = np.array([0., -1., 0]), np.array([0., 1., 0.])

        cg.coords["s1"] = np.array([0., 0., 2.]), np.array([0., 0., 3.])
        cg.twists["s1"] = np.array([-1., 0., 0.]), np.array([1., 0., 0.])

        cg.coords["h0"] = np.array([0, 1, 3]), np.array([0, 2, 4])
        cg.add_bulge_coords_from_stems()

        print(cg.coords["i0"])
        print(cg.twists)

        as1, as2 = cg.get_bulge_angle_stats("i0")

        self.assertAlmostEqual(as1.get_angle(), math.radians(180))
        self.assertAlmostEqual(as2.get_angle(), math.radians(180))
Ejemplo n.º 3
0
def fasta_to_json(fasta_text, circular=False):
    """
    Create the d3 compatible graph representation from a dotbracket string
    formatted like so:

        >id
        ACCCGGGG
        (((..)))

    @param fasta_text: The fasta string.
    """
    bg = fgb.from_fasta_text(fasta_text)
    return bg_to_json(bg, circular=circular)
Ejemplo n.º 4
0
def json_to_json(rna_json_str):
    '''
    Convert an RNA json string to fasta file, then to a bulge_graph
    and then back to a json.

    The purpose is to maintain the integrity of the molecule and to
    maintain the positions of all the hidden nodes after modification.
    '''
    with open('test.out', 'w') as f:
        f.write(rna_json_str)

    (all_fastas, all_xs, all_ys, all_uids, different_tree_links) = json_to_fasta(rna_json_str)
    big_json = {'nodes': [], 'links': []}

    coords_to_index = dict()
    for fasta_text, xs, ys, uids in zip(all_fastas, all_xs, all_ys, all_uids):
        bg = fgb.from_fasta_text(fasta_text)
        new_json = bg_to_json(bg, xs=xs, ys=ys, uids=uids)
        
        for l in new_json['links']:
            # the indices of the new nodes will be offset, so the links
            # have to have their node pointers adjusted as well
            l['source'] += len(big_json['nodes'])
            l['target'] += len(big_json['nodes'])
            big_json['links'] += [l]

        # Create a mapping between the coordinates of a node and its index
        # in the node list. To be used when creating links between different
        # molecules, which are stored according to the coordinates of the nodes
        # being linked
        for i,n in enumerate(new_json['nodes']):
            if n['node_type'] == 'nucleotide':
                coords_to_index[(n['x'], n['y'])] = i + len(big_json['nodes'])

        big_json['nodes'] += new_json['nodes']
  
    # add the links that are between different molecules
    for dtl in different_tree_links:
        fud.pv('dtl')
        n1 = coords_to_index[(dtl[0])]
        n2 = coords_to_index[(dtl[1])]
        
        fud.pv('n1,n2')
        big_json['links'] += [{'source':n1, 'target':n2, 'link_type':'basepair', 'value':1}]

    #fud.pv('big_json["nodes"]')

    return big_json
Ejemplo n.º 5
0
 def test_radius_of_gyration_no_stems(self):
     bg = fgb.from_fasta_text("AUCG\n....")
     cg = ftmc.from_bulge_graph(bg)
     cg.coords["f0"] = [0, 0, 0.], [12., 1, 1]
     self.assertTrue(math.isnan(cg.radius_of_gyration()))
Ejemplo n.º 6
0
def main():
    usage = """
    python fasta_to_reduced_graph.py fasta_file

    Generate a reduced graph representation of this secondary structure.
    fasta_file should be either a filename pointing to a fasta file
    or a dash (-) indicating that the input should come from stdin.
    """
    num_args = 0
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    if args[0] == '-':
        text = sys.stdin.read()
    else:
        with open(args[0], 'r') as f:
            text = f.read()

    # load a BulgeGraph from a fasta-like file
    # i.e.
    # >test
    # AACCGG
    # ((..))
    bg = fgb.from_fasta_text(text)

    prev = None
    seen = set()
    out_str = ''
    elements_traversed = []

    for i in range(1, bg.seq_length + 1):
        # iterate over each nucleotide and get the name of the element
        # that it is part of
        node = bg.get_node_from_residue_num(i)

        if node == prev:
            # we've already seen this element
            continue
        if node[0] == 's':
            if node in seen:
                # we've seen this stem before, so we just need to close the bracket
                out_str += ')'
            else:
                # new stem, so open a bracket
                out_str += '('

            elements_traversed += [node]
        elif node[0] == 'i':
            out_str += '.'
            elements_traversed += [node]

        prev = node
        seen.add(node)

    print(out_str)
    print(",".join(elements_traversed))

    for t, e in zip(out_str, elements_traversed):
        # print the sequences of each element in the reduced representation
        print(t, e, bg.get_define_seq_str(e))
Ejemplo n.º 7
0
def load_rna(filename, rna_type="any", allow_many=True, pdb_chain=None,
             pbd_remove_pk=True, pdb_dotbracket="",
             dissolve_length_one_stems = True):
    """
    :param rna_type: One of "any", "cg" and "3d" and "pdb"

                     *  "any": Return either BulgeGraph or CoarseGrainRNA objekte,
                            depending on the input format
                     *  "cg":  Always convert to CoarseGrainRNA objects,
                            even if they have no 3D information
                     *  "only_cg": Only accept cg-files.
                     *  "3d":  Return CoarseGrainRNA objects,
                            if the file contains 3D information,
                            raise an error otherwise
                     *  "pdb": only accept pdb files
    :param allow_many: If True, return a list. If False raise an error, if more than one RNA is present.
    :param pdb_chain: Extract the given chain from the file.
                      Only applicable if filename corresponds to a pdb file
    :param pdb_remove_pk: Detect pseudoknot-free structures from the pdb.
    :param pdb_dotbracket: Only applicable, if filename corresponds to a pdb file and pdb_chain is given.
    :param dissolve_length_one_stems: Ignored if input is in forgi bg/cg format.

    :retuns: A list of RNAs or a single RNA
    """
    # Is filename a dotbracket string and not a filename?
    if all( c in ".()[]{}&" for c in filename):
        # A dotbracket-string was provided via the commandline
        if not rna_type=="any":
            warnings.warn("Cannot treat '{}' as dotbracket string, since we need a sequence. "
                          "Trying to treat it as a filename instead...".format(filename))
        else:
            log.info("Assuming RNA %s is a dotbracketstring and not a file.", filename)
            bg = fgb.from_fasta_text(filename, dissolve_length_one_stems=dissolve_length_one_stems)
            if allow_many:
                return [bg]
            else:
                return bg
    with open(filename) as rnafile:
        filetype = sniff_filetype(rnafile)
    if rna_type=="pdb" and filetype!="pdb":
        raise WrongFileFormat("Only PDB files are accepted, but file {} has type {}.".format(filename, filetype))
    if rna_type=="only_cg" and filetype!="forgi":
        raise WrongFileFormat("Only forgi cg files are accepted, but file {} has type {}.".format(filename, filetype))
    if filetype=="forgi":
        cg = ftmc.CoarseGrainRNA(filename)
        if rna_type in ["3d", "only_cg"] and not cg.coords.is_filled:
            raise WrongFileFormat("File {} does not contain all 3D coordinates!".format(filename))
        if allow_many:
            return [cg]
        else:
            return cg
    elif filetype=="pdb":
        if pdb_chain:
            cgs = [ftmc.load_cg_from_pdb(filename, chain_id=pdb_chain,
                                                 remove_pseudoknots=pbd_remove_pk and not pdb_dotbracket,
                                                 secondary_structure=pdb_dotbracket, dissolve_length_one_stems=dissolve_length_one_stems)]
            if dissolve_length_one_stems:
                for cg in cgs:
                    cg.dissolve_length_one_stems()
        else:
            if pdb_dotbracket:
                raise ValueError("pdb_dotbracket requires a chain ti be given to avioid ambiguity.")
            cgs = ftmc.connected_cgs_from_pdb(filename, remove_pseudoknots = pbd_remove_pk,
                                              dissolve_length_one_stems=dissolve_length_one_stems)
        if allow_many:
            return cgs
        else:
            if len(cgs)>1:
                raise WrongFileFormat("More than one connected RNA component in pdb file {}.".format(filename))
            return cgs[0]
    elif filetype=="mmcif":
        raise WrongFileFormat("MMCIF files are not yet supported.")
    elif filetype=="bpseq":
        if rna_type=="3d":
            raise WrongFileFormat("bpseq file {} is not supported. We need 3D coordinates!".format(filename))
        bg = fgb.BulgeGraph()
        with open(filename, 'r') as f:
            text = f.read()
            try:
                int(text[0])
            except ValueError:
                i=text.find("\n1 ")
                text=text[i+1:]
        bg.from_bpseq_str(text, dissolve_length_one_stems=dissolve_length_one_stems)
        if rna_type=="cg":
            bg = ftmc.from_bulge_graph(bg)
        if allow_many:
            return [bg]
        else:
            return bg
    elif filetype =="fasta" or filetype=="other":
        if rna_type=="3d":
            raise WrongFileFormat("Fasta(like) file {} is not supported. We need 3D coordinates!".format(filename))
        try:
            bgs = fgb.from_fasta(filename, dissolve_length_one_stems=dissolve_length_one_stems)
        except Exception as e:
            with log_to_exception(log, e):
                log.critical("Could not parse file %r.", filename)
                if filetype=="other":
                    log.critical("We assumed file %r to be some fasta-variant or dotbracket file, but an error occurred during parsing.", filename)
            raise
        if isinstance(bgs, fgb.BulgeGraph):
            bgs = [bgs]
        if dissolve_length_one_stems:
            for bg in bgs:
                bg.dissolve_length_one_stems()
        if rna_type=="cg":
            bgs = list(map(ftmc.from_bulge_graph, bgs))
        if allow_many:
            return bgs
        else:
            if len(bgs)>1:
                raise WrongFileFormat("More than one RNA found in fasta/ dotbracket file {}.".format(filename))
            return bgs[0]