예제 #1
0
def main():
    args = parser.parse_args()

    cg, = fuc.cgs_from_args(
        args, rna_type="cg",
        enable_logging=True)  # Set log-level as a sideeffect

    if args.reference:
        args.rna = [args.reference]
        reference_cg, = fuc.cgs_from_args(args,
                                          rna_type="cg",
                                          enable_logging=False)
    else:
        reference_cg = None
    if args.externally_interacting:
        elems_or_nts = args.externally_interacting.split(",")
        nts = []
        for pos in elems_or_nts:
            try:
                nt = int(pos)
            except Exception:
                d = cg.defines[elem]  #Might raise a KeyError, but that is fine
                if d:
                    nts.append(d[0])
                else:
                    log.warning(
                        "Not setting %s to externally interacting, because it contains 0 nucleotides.",
                        elem)
            else:
                nts.append(nt)
        cg.interacting_residues.extend([cg.seq.to_resid(nt) for nt in nts])
        log.info(
            "The RNA now has the following elements not perticipating in interaction energies (if presenty): %s",
            cg.interacting_elements)

    if len(list(cg.stem_iterator())) < 2:
        raise ValueError(
            "No sampling can be done for structures with fewer than 2 stems")

    with fess.directory_utils.make_outdir(args, cg) as main_dir:
        cg_stri = cg.to_cg_string()
        with open(os.path.join(main_dir, 'input.cg'), "w") as f:
            print(cg_stri, file=f)
        try:
            run(args, cg, main_dir, reference_cg)
        except BaseException as e:
            with open(os.path.join(main_dir, 'exception.log'), "w") as f:
                print("Running on python {}, the following error occurred:".
                      format(sys.version),
                      file=f)
                print("{}: {}".format(type(e).__name__, str(e)), file=f)
                print(str(traceback.format_exc()), file=f)
            raise
예제 #2
0
def main(parser):
    args = parser.parse_args()

    with fuc.hide_traceback():
        cg1, cg2 = fuc.cgs_from_args(args,
                                     nargs=2,
                                     rna_type="3d",
                                     enable_logging=True)

    dir1 = np.array(args.directions[0].split(","), dtype=float)
    dir2 = np.array(args.directions[1].split(","), dtype=float)

    proj1 = ftmp.Projection2D(cg1, dir1)
    proj2 = ftmp.Projection2D(cg2, dir2)

    vrs1 = np.array(
        [x for p in sorted(proj1._coords.keys()) for x in proj1._coords[p]])
    vrs2 = np.array(
        [x for p in sorted(proj2._coords.keys()) for x in proj2._coords[p]])

    print(ftms.rmsd(vrs1, vrs2))
    if args.plot:
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots()
        proj1.plot(ax, line2dproperties={"color": "green"})
        proj2.plot(ax, line2dproperties={"color": "red"})
        plt.show()
예제 #3
0
def main(args):
    with fuc.hide_traceback():
        cg1, cg2 = fuc.cgs_from_args(
            args, rna_type="3d", enable_logging=True)

        if not (args.acc or args.rmsd or args.pdb_rmsd):
            showall = True
        else:
            showall = False
        if showall or args.acc:
            if cg1.defines != cg2.defines:
                if args.acc:
                    print(
                       "Cannot compare two 3d structures that do not correspond to the same RNA.")
                    sys.exit(1)
            else:
                adj = ftms.AdjacencyCorrelation(cg1)
                print("ACC:\t{:.3f}".format(ftms.mcc(adj.evaluate(cg2))))
        if showall or args.rmsd:
            print("RMSD:\t{:.3f}".format(ftms.cg_rmsd(cg1, cg2)))
        if showall or args.pdb_rmsd:
            if not pdb_rmsd(cg1, cg2):
                # If --pdb-rmsd was not given, just don't print it.
                # If it was given, we exit with non-zero exit status.
                if args.pdb_rmsd:
                    print(
                        "Cannot calculate PDB-RMSD: The two files do not contain the same chains.")
                    sys.exit(1)
예제 #4
0
def main(args):

    with fuc.hide_traceback():
        bg, = fuc.cgs_from_args(args, "any", enable_logging=True)

    multiloops, _ = bg.find_multiloop_loops()
    for multi in multiloops:
        shortened = set()
        for m in multi:
            if m[0] != 'm':
                continue
            connected_stems = list(bg.edges[m])
            for to_shorten in connected_stems:
                if to_shorten in shortened:
                    continue
                shortened.add(to_shorten)

                # find the stems which are connected to this multiloop
                # and pick a random one
                db = list(bg.to_dotbracket_string())

                # get the side of the stem which is connected to the
                # multiloop
                (s1b, s1e) = bg.get_sides(to_shorten, m)

                # print to_shorten, s1b, "(", bg.defines[to_shorten], ")"
                # the nucleotides that need to be changed
                to_change = bg.get_side_nucleotides(to_shorten, s1b)
                # print bg.defines[to_shorten], to_change

                db[to_change[0] - 1] = '.'
                db[to_change[1] - 1] = '.'
                print("".join(db))
예제 #5
0
def main():
    args = parser.parse_args()

    cgs = fuc.cgs_from_args(
        args, rna_type="cg",
        enable_logging=True)  # Set log-level as a sideeffect
    for cg in cgs:
        if len(list(cg.stem_iterator())) < 2:
            raise ValueError(
                "No sampling can be done for structures with fewer than 2 stems"
            )

    with fess.directory_utils.make_outdir(args, cgs[0]) as main_dir:
        for i, cg in enumerate(cgs):
            cg_stri = cg.to_cg_string()
            with open(os.path.join(main_dir, 'input{}.cg'.format(i)),
                      "w") as f:
                print(cg_stri, file=f)
        try:
            run(args, cgs, main_dir)
        except BaseException as e:
            with open(os.path.join(main_dir, 'exception.log'), "w") as f:
                print("Running on python {}, the following error occurred:".
                      format(sys.version),
                      file=f)
                print("{}: {}".format(type(e).__name__, str(e)), file=f)
                print(str(traceback.format_exc()), file=f)
            raise
예제 #6
0
def main(args):
    with fuc.hide_traceback():
        cg1, cg2 = fuc.cgs_from_args(args, rna_type="3d", enable_logging=True)

        if not (args.acc or args.rmsd or args.pdb_rmsd):
            showall = True
        else:
            showall = False
        if showall or args.acc:
            if cg1.defines != cg2.defines:
                if args.acc:
                    print(
                        "Cannot compare two 3d structures that do not correspond to the same RNA."
                    )
                    sys.exit(1)
            else:
                adj = ftms.AdjacencyCorrelation(cg1)
                print("ACC:\t{:.3f}".format(ftms.mcc(adj.evaluate(cg2))))
        if showall or args.rmsd:
            print("RMSD:\t{:.3f}".format(ftms.cg_rmsd(cg1, cg2)))
        if showall or args.pdb_rmsd:
            if not pdb_rmsd(cg1, cg2):
                # If --pdb-rmsd was not given, just don't print it.
                # If it was given, we exit with non-zero exit status.
                if args.pdb_rmsd:
                    print(
                        "Cannot calculate PDB-RMSD: The two files do not contain the same chains."
                    )
                    sys.exit(1)
예제 #7
0
def main(parser):
    args = parser.parse_args()

    with fuc.hide_traceback():
        cg1, cg2 = fuc.cgs_from_args(
            args, rna_type="3d", enable_logging=True)

    dir1 = np.array(args.directions[0].split(","), dtype=float)
    dir2 = np.array(args.directions[1].split(","), dtype=float)

    proj1 = ftmp.Projection2D(cg1, dir1)
    proj2 = ftmp.Projection2D(cg2, dir2)

    vrs1 = np.array([x for p in sorted(proj1._coords.keys())
                     for x in proj1._coords[p]])
    vrs2 = np.array([x for p in sorted(proj2._coords.keys())
                     for x in proj2._coords[p]])

    print(ftms.rmsd(vrs1, vrs2))
    if args.plot:
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots()
        proj1.plot(ax, line2dproperties={"color": "green"})
        proj2.plot(ax, line2dproperties={"color": "red"})
        plt.show()
예제 #8
0
def main():
    parser = fuc.get_rna_input_parser(
        "Train a classifier for A-Minior interactions.",
        nargs="+",
        rna_type="only_cg")
    parser.add_argument("--fr3d-result",
                        type=str,
                        required=True,
                        help="A file containing the FR3D output")
    parser.add_argument("--fr3d-query",
                        type=str,
                        help="Add this string describing the FR3D query "
                        "as a comment to the trainingsdata-out file")
    parser.add_argument("--chain-id-mapping-dir",
                        type=str,
                        help="If you use PDB-bundles, this directory "
                        "needs to hold all chain-id-mapping.txt files.")
    parser.add_argument("--trainingsdata-out",
                        type=str,
                        default="forgi/threedee/data/aminor_geometries.csv",
                        help="File that will be written for the geometries "
                        "of interactions and non-interactions.")
    parser.add_argument("--model-params-out",
                        type=str,
                        default="forgi/threedee/data/aminor_params.json",
                        help="File that will be written for the "
                        "model's hyper-parameters.")
    parser.add_argument("--test-set",
                        type=str,
                        help="':'-separated PDB-ids"
                        " for the test-set.")
    parser.add_argument("--train-set",
                        type=str,
                        help="':'-separated PDB-ids for the train-set."
                        "Note: This is only used for cross-validation."
                        "The final model will be trained on all the data.")

    args = parser.parse_args()
    cgs, cg_filenames = fuc.cgs_from_args(args,
                                          rna_type="only_cg",
                                          return_filenames=True)
    ftcta.create_geometry_file(args.trainingsdata_out, cgs, cg_filenames,
                               args.fr3d_result, args.chain_id_mapping_dir,
                               args.fr3d_query)
    hyper_params = ftcta.tune_model(args.trainingsdata_out, args.train_set,
                                    args.test_set)
    with open(args.model_params_out, "w") as f:
        json.dump(hyper_params, f)
예제 #9
0
 def __init__(self, ref_fn, name, args):
     """
     :param ref_fn: A file name of an RNA.
     """
     super(StemRMSD, self).__init__()
     try:
         args.rna = [ref_fn]
         reference_cg, = fuc.cgs_from_args(args,
                                           rna_type="cg",
                                           enable_logging=False)
     except Exception as e:
         log.exception("Cannot create StemRMSD.")
         self.silent = True
         return
     self._reference = reference_cg
     if name:
         self.header = ["stem_rmsd_of_" + name]
     self.has_logged = False
예제 #10
0
def main(args):
    rec = fbr.Reconstructor(args.source_pdb_dir, args.source_cg_dir, args.server)
    with fuc.hide_traceback(): # Applies only to WrongFileFormat
        cgs, fns = fuc.cgs_from_args(args, rna_type="only_cg", enable_logging=True, return_filenames=True)
    # Preprocessing
    most_common_pdbs = collections.Counter()
    for cg in cgs:
        sm = fbm.SpatialModel(cg)
        sm.load_sampled_elems(None)
        curr_fns = set()
        for stat in sm.elem_defs.values():
            stat_name = stat.pdb_name
            pdb_basename = stat_name.split(":")[0]
            pdb_filename = op.expanduser(op.join(rec.pdb_library_path, "_".join(pdb_basename.split("_")[:-1])+".cif"))
            try:
                with open(pdb_filename): pass
            except IOError:
                pdb_filename = pdb_filename.rstrip(".cif")+".pdb"
            curr_fns.add(pdb_filename)
        for fn in curr_fns:
            most_common_pdbs[fn]+=1
    for fn, count in most_common_pdbs.most_common(250):
        if count==1:
            break
        print("Preloading {}, used {} times".format(fn, count))
        rec.get_pdb(fn, True)
    print("Preloading of most common PDBs done")
    logging.getLogger("forgi").setLevel(logging.ERROR)
    logging_exceptions.config_from_args(args)
    for i, cg in enumerate(cgs):
        try:
            fn = fns[i]
            reconstruct(cg, fn, args, rec)
        except Exception as e:
            logging_exceptions.log_exception(e)
            log.exception("During reconstruction of cg %s, an error occurred: %s", fn, e)
    i=0
예제 #11
0
def main():
    parser = fuc.get_rna_input_parser("Train a classifier for A-Minior interactions.",
                                      nargs="+", rna_type="only_cg")
    parser.add_argument("--fr3d-result", type=str, required=True,
                        help="A file containing the FR3D output")
    parser.add_argument("--fr3d-query", type=str,
                        help="Add this string describing the FR3D query "
                             "as a comment to the trainingsdata-out file")
    parser.add_argument("--chain-id-mapping-dir", type=str,
                        help="If you use PDB-bundles, this directory "
                             "needs to hold all chain-id-mapping.txt files.")
    parser.add_argument("--trainingsdata-out", type=str,
                        default="forgi/threedee/data/aminor_geometries.csv",
                        help="File that will be written for the geometries "
                             "of interactions and non-interactions.")
    parser.add_argument("--model-params-out", type=str,
                        default="forgi/threedee/data/aminor_params.json",
                        help="File that will be written for the "
                             "model's hyper-parameters.")
    parser.add_argument("--test-set", type=str, help="':'-separated PDB-ids"
                                                     " for the test-set.")
    parser.add_argument("--train-set", type=str,
                        help="':'-separated PDB-ids for the train-set."
                             "Note: This is only used for cross-validation."
                             "The final model will be trained on all the data.")

    args = parser.parse_args()
    cgs, cg_filenames = fuc.cgs_from_args(args, rna_type="only_cg",
                                          return_filenames=True)
    ftcta.create_geometry_file(args.trainingsdata_out, cgs, cg_filenames,
                               args.fr3d_result, args.chain_id_mapping_dir,
                               args.fr3d_query)
    hyper_params = ftcta.tune_model(
        args.trainingsdata_out, args.train_set, args.test_set)
    with open(args.model_params_out, "w") as f:
        json.dump(hyper_params, f)
예제 #12
0
    #parser.add_argument('--ame-pdb-id-file', help='For the AMinor energy, only consider pdb ids from this file', type=str)
    parser.add_argument('--precalculated-ame-orient',
                        default=False,
                        action="store_true")
    return parser


parser = get_parser()
if __name__ == "__main__":
    args = parser.parse_args()

    #Logging
    logging.basicConfig(
        format="%(levelname)s:%(name)s:%(funcName)s[%(lineno)d]: %(message)s")
    logging.captureWarnings(True)
    cgs = fuc.cgs_from_args(args, "+")

    log = logging.getLogger(__name__)
    log.info(fau.get_version_string())

    if args.use_subgraphs is None:
        use_subgraphs = max(1, math.ceil(10000 / len(cg_files)))
    else:
        use_subgraphs = args.use_subgraphs

    if not args.plot_only:
        #Generating the files

        ## ROG
        if args.rog_target_file:
            fbe.RadiusOfGyrationEnergy.generate_target_distribution(
예제 #13
0
def main(parser):
    args = parser.parse_args()

    poss = c.defaultdict(list)
    sources = c.defaultdict(list)

    cgs = fuc.cgs_from_args(args, rna_type="pdb", enable_logging=True)
    for i, cg in enumerate(cgs):

        if len(list(cg.stem_iterator())) == 0:
            log.warning("Skipping RNA %s (%s): no stems", i, cg.pdb_name)
            continue

        for d in cg.defines.keys():
            if np.allclose(cg.coords[d][0], cg.coords[d][1]):
                log.warning(
                    "Skipping element %s of RNA %s (%s): degenerate coordinates.",
                    d, i, cg.pdb_name)
                continue

            origin, basis = ftug.element_coord_system(cg, d)

            if d[0] == 'i' or d[0] == 'm':
                conn = cg.connections(d)
                conn_type = cg.connection_type(d, conn)
            else:
                conn_type = 0

            for i, r in it.izip(it.count(), cg.define_residue_num_iterator(d)):

                # add only the base atoms which are relevant to the calculation
                # of the chi torsion angle
                seq_id = cg.seq_ids[r - 1]
                resname = cg.chains[seq_id.chain][seq_id.resid].resname.strip()

                if resname not in ftup.chi_torsion_atoms.keys():
                    print("Unknown nucleotide name:", resname, file=sys.stderr)
                    continue

                atoms = ftup.nonsidechain_atoms + \
                    ftup.chi_torsion_atoms[resname][-2:]
                scatoms = ftup.side_chain_atoms[resname]
                for aname in atoms + scatoms:
                    try:
                        resid = cg.seq_ids[r - 1]
                        a = cg.chains[resid.chain][resid.resid][aname]
                    except KeyError as ke:
                        # missing an atom
                        continue

                    # The C1'->B1 and B1->B2 vectors define the plane of the base
                    # The O4'->C1'->B1->B2 sequence defines the torsion
                    # angle chi
                    if aname == ftup.chi_torsion_atoms[resname][-2]:
                        aname = 'B1'
                    elif aname == ftup.chi_torsion_atoms[resname][-1]:
                        aname = 'B2'
                    elif aname in scatoms:
                        aname = resname + "." + aname
                    avec = a.get_vector().get_array()
                    atom_pos = ftuv.change_basis(avec - origin, basis,
                                                 ftuv.standard_basis)
                    identifier = "%s %s %d %d %s" % (d[0], " ".join(
                        map(str,
                            cg.get_node_dimensions(d))), conn_type, i, aname)
                    poss[identifier] += [atom_pos]
                    sources[identifier] += [d]

                    print("{}:{}".format(identifier,
                                         ",".join(map(str, atom_pos))))
예제 #14
0
    out_column = []
    for i in range(len(data[sorting_column])):
        if only_for_col is None or data[only_for_col][i]:
            out_column.append(sorted_vals.index(data[sorting_column][i]) + 1)
        else:
            out_column.append(0)
    log.info("number_by column is %s, len(data[%s])=%s)", out_column,
             sorting_column, len(data[sorting_column]))
    return out_column


parser = generateParser()
if __name__ == "__main__":
    args = parser.parse_args()
    cgs, filenames = fuc.cgs_from_args(args,
                                       "any",
                                       enable_logging=True,
                                       return_filenames=True)
    data = defaultdict(list)

    if args.distances:
        dist_pairs = str(args.distances).split(str(':'))
        dist_pairs = [x.split(",") for x in dist_pairs]

    else:
        dist_pairs = []
    if args.angles:
        angle_pairs = str(args.angles).split(str(":"))
        angle_pairs = [x.split(",") for x in angle_pairs]
    else:
        angle_pairs = []
    for i, cg in enumerate(cgs):
예제 #15
0
                         "The file-name will be the RNA's name (if present), otherwise 'rna001' etc.")
parser.add_argument("--filename", type=str,
                    help="If this is present, --to-file will automatically be true."
                         "A target filename (or path) without extention. "
                         "If it is a filename, use the given filename instead of the RNA's name. "
                         "If more than one input-RNA is present, appends automatically a increasing number."
                         "If it is a directory, create files in this directory.")
parser.add_argument("-f", "--force", action="store_true",
                    help="Overwrite files, if they already exist. Note: In case of race conditions, "
                         "files could be overwritten even if this flag is not provided.")


if __name__ == "__main__":
    args = parser.parse_args()
    with fuc.hide_traceback():
        cgs = fuc.cgs_from_args(
            args, rna_type=FILETYPES[args.target_type].rna_type)

    if args.filename:
        args.to_file = True
        if os.path.isdir(args.filename):
            directory = args.filename
            filename = None
            args.filename = None
        else:
            directory, filename = os.path.split(args.filename)
    else:
        filename = None
        directory = ""
    for i, cg in enumerate(cgs):
        if not args.to_file and i > 0:
            print("\n\n========================================\n\n")
예제 #16
0
def main():
    parser = fuc.get_rna_input_parser("Find pseudoknots in RNA structures, "
                                      "classify them into shapes and analyze "
                                      "their 3D architecturre.", "+",
                                      parser_kwargs={"conflict_handler":"resolve"})
    parser.add_argument("--pseudoknots", action="store_true", help=argparse.SUPPRESS)
    parser.add_argument("--outfile-mode", choices=["w","a"], default='w', help="Overwrite ('w') or append ('a') to output file")
    parser.add_argument("--minlength", type= int, help= "Minimum length of each stem. "
                            "Stems with fewer base-pairs are treated as unpaired.",
                            default = 2)

    args =  parser.parse_args()
    args.pseudoknots=True
    rnas, filenames = fuc.cgs_from_args(args, rna_type="any",return_filenames = True, skip_errors=True)


    #variables for statistics
    unfold = 0
    without_pk = 0
    with_pk = 0
    pseudoknot_dataset = []
    pseudoknot_dataset_extended = defaultdict(list)

    for pos, rna in enumerate(rnas):
        try:
            without_pk, with_pk, unfold, pseudoknots = identification_pseudoknot\
                                            (rna, without_pk, with_pk, unfold,args.minlength)

            #count types of pseudoknots
            total_pk_g1 = len(pseudoknots)

            pk_sortclasses = {}
            other = 0
            other_pk =  []

            pk_classes, other = classify_pseudoknots(pseudoknots)

            print("pk_classes:")
            for key, pks in sorted(pk_classes.items()):
                print("{:<17s} {}\t{}".format(key, len(pks), ", ".join(map(str, pks))))
            print("other:"+"\t"+", ".join(map(str,other)))

            filename = str(filenames[pos]).split("/")[-1]

            entry={}
            for key, pks in pk_classes.items():
                entry[key] = len(pks)
            entry["other"] = len(other)
            entry["PK_other_structures"] = ",".join(map(str, other))
            entry["filename"] = filename
            entry["rnaname"] = rna.name
            print(filename)
            pseudoknot_dataset.append(entry)
            pk_id = 0
            for key, pks in pk_classes.items():
                for pk in pks:
                    pk_id+=1
                    extend_pk_description(pseudoknot_dataset_extended, filename,
                                          key, rna, pk, pk_id)


            for pk in other:
                pk_id+=1
                extend_pk_description(pseudoknot_dataset_extended, filename,
                                      "other", rna, pk, pk_id)
        except GraphIntegrityError:
            log.exception("Ignoring RNA %s; GraphIntegrityError", rna.name)
        except GraphConstructionError:
            log.exception("Ignoring RNA %s; GraphConstructionError", rna.name)
        except Exception:
            log.error("Error processing %s", rna.name)
            raise
    df1 = pandas.DataFrame(pseudoknot_dataset)
    df1.to_csv("pseudoknot_identification_genus2.csv", mode=args.outfile_mode, header=args.outfile_mode!="a", sep="\t")

    df2 = pandas.DataFrame(pseudoknot_dataset_extended)
    df2.to_csv("pseudoknot_identification_extended_genus2.csv", mode=args.outfile_mode, header=args.outfile_mode!="a", sep="\t")


    print("Structures with Pseudoknots: {}".format(with_pk))
    print("Structures without Pseudoknots: {}".format(without_pk))
    print("Structures unfold: {}".format(unfold))
예제 #17
0
        "--fragment-dir",
        type=str,
        help=
        "A directory, where pdb fragments will be stored. If None is given, they will not be stored."
    )
    return parser


parser = generateParser()

if __name__ == "__main__":
    next_id = defaultdict(int)
    args = parser.parse_args()
    cgs, filenames = fuc.cgs_from_args(args,
                                       "+",
                                       "pdb",
                                       enable_logging=True,
                                       return_filenames=True,
                                       skip_errors=True)

    for cg, fn in zip(cgs, filenames):
        if sys.stderr.isatty():
            print(cg.name, file=sys.stderr)
        cg.add_all_virtual_residues()
        for elem in cg.defines:
            if elem in cg.incomplete_elements:
                continue
            base_name = "{}:{}_".format(cg.name, elem[0])
            idnr = next_id[base_name]
            next_id[base_name] += 1
            name = base_name + str(idnr)
            if args.fragment_dir:
예제 #18
0
    sorted_vals = list(sorted(set(df[sorting_column])))
    out_column = []
    for i in range(len(data[sorting_column])):
        if only_for_col is None or data[only_for_col][i]:
            out_column.append(sorted_vals.index(data[sorting_column][i]) + 1)
        else:
            out_column.append(0)
    log.info("number_by column is %s, len(data[%s])=%s)", out_column, sorting_column, len(
        data[sorting_column]))
    return out_column


parser = generateParser()
if __name__ == "__main__":
    args = parser.parse_args()
    cgs, filenames = fuc.cgs_from_args(
        args, "any", enable_logging=True, return_filenames=True)
    data = defaultdict(list)

    if args.distances:
        dist_pairs = str(args.distances).split(str(':'))
        dist_pairs = [x.split(",") for x in dist_pairs]

    else:
        dist_pairs = []
    if args.angles:
        angle_pairs = str(args.angles).split(str(":"))
        angle_pairs = [x.split(",") for x in angle_pairs]
    else:
        angle_pairs = []
    for i, cg in enumerate(cgs):
        file_num = i + 1
예제 #19
0
    "A target filename (or path) without extention. "
    "If it is a filename, use the given filename instead of the RNA's name. "
    "If more than one input-RNA is present, appends automatically a increasing number."
    "If it is a directory, create files in this directory.")
parser.add_argument(
    "-f",
    "--force",
    action="store_true",
    help=
    "Overwrite files, if they already exist. Note: In case of race conditions, "
    "files could be overwritten even if this flag is not provided.")

if __name__ == "__main__":
    args = parser.parse_args()
    with fuc.hide_traceback():
        cgs = fuc.cgs_from_args(args,
                                rna_type=FILETYPES[args.target_type].rna_type)

    if args.filename:
        args.to_file = True
        if os.path.isdir(args.filename):
            directory = args.filename
            filename = None
            args.filename = None
        else:
            directory, filename = os.path.split(args.filename)
    else:
        filename = None
        directory = ""
    for i, cg in enumerate(cgs):
        if not args.to_file and i > 0:
            print("\n\n========================================\n\n")
예제 #20
0
def main(args):
    rnas = fuc.cgs_from_args(args, '+', '3d')
    pp = pymol_printer_from_args(args)

    if args.align:
        align_rnas(rnas)
    if args.labels:
        label_list = args.labels.split(",")
        labels = {}
        for label in label_list:
            if not label:
                continue
            try:
                elem, lab = label.split(':')
            except ValueError:
                raise ValueError(
                    "Please specify --labels with as list of colon-seperated tuples. Found invalid entry {}."
                    .format(repr(label)))
            labels[elem] = lab
        if not pp.print_text:
            labels = defaultdict(lambda: "", labels)
            pp.print_text = True
    else:
        labels = {}

    color_modifier = 1.0
    log.info("Visualizing {} rnas".format(len(rnas)))
    for rna in rnas:
        pp.add_cg(rna, labels, color_modifier)
        color_modifier *= 0.7

    with make_temp_directory() as tmpdir:
        # The file describing the cg-structure as cylinders
        if args.pymol_file:
            stru_filename = args.pymol_file
        else:
            stru_filename = os.path.join(tmpdir, "structure")
        with open(stru_filename, "w") as f:
            f.write(pp.pymol_string())

        pdb_fns = []
        selections = ""
        for i, rna in enumerate(rnas):
            if rna.chains:
                obj_name = "pdb{}_{}".format(i, rna.name.replace("-", "_"))
                fn = os.path.join(tmpdir, obj_name + ".cif")
                pdb_fns.append(fn)
                ftup.output_multiple_chains(rna.chains.values(), fn, "cif")
                for d in rna.defines:
                    resids = list(
                        rna.define_residue_num_iterator(d, seq_ids=True))
                    if resids:
                        chains = {r.chain for r in resids}
                        sel = []
                        for c in chains:
                            sel.append(
                                "( %{} and chain {} and resi {}) ".format(
                                    obj_name, c, "+".join(
                                        map(str,
                                            (r.resid[1] for r in resids)))))
                        selections += "select {}, ".format(
                            d + "_" + obj_name) + " or ".join(sel) + "\n"

        pymol_cmd = 'hide all\n'
        pymol_cmd += 'show cartoon, all\n'
        pymol_cmd += 'set cartoon_ring_mode\n'
        pymol_cmd += 'set cartoon_tube_radius, .3\n'
        if args.only_elements is not None:
            pymol_cmd += "hide all\n"

            for constraint in args.only_elements.split(','):
                color = pp.get_element_color(constraint)

                for r in cg.define_residue_num_iterator(constraint,
                                                        seq_ids=True):
                    pymol_cmd += "show sticks, resi %r\n" % (r[1])
                    pymol_cmd += "color %s, resi %r\n" % (color, r[1])

        pymol_cmd += 'run %s\n' % (stru_filename)
        pymol_cmd += 'bg white\n'
        pymol_cmd += 'clip slab, 10000\n'
        #pymol_cmd += 'orient\n'
        pymol_cmd += selections
        if args.output is not None:
            pymol_cmd += 'ray\n'
            pymol_cmd += 'png %s\n' % (args.output)
            #pymol_cmd += 'quit\n'
        pml_filename = os.path.join(tmpdir, "command.pml")
        with open(pml_filename, "w") as f1:
            f1.write(pymol_cmd)
        if args.batch:
            p = sp.Popen(['pymol', '-cq'] + pdb_fns + [pml_filename],
                         stdout=sp.PIPE,
                         stderr=sp.PIPE)
        else:
            p = sp.Popen(['pymol'] + pdb_fns + [pml_filename],
                         stdout=sp.PIPE,
                         stderr=sp.PIPE)
        log.info("Now opening pymol")
        out, err = p.communicate()
        log.info("Out=\n%s", out)
        log.info("Errt=\n%s", err)
예제 #21
0
def main():
    parser = fuc.get_rna_input_parser(
        "Find pseudoknots in RNA structures, "
        "classify them into shapes and analyze "
        "their 3D architecturre.",
        "+",
        parser_kwargs={"conflict_handler": "resolve"})
    parser.add_argument("--pseudoknots",
                        action="store_true",
                        help=argparse.SUPPRESS)
    parser.add_argument("--outfile-mode",
                        choices=["w", "a"],
                        default='w',
                        help="Overwrite ('w') or append ('a') to output file")
    parser.add_argument("--minlength",
                        type=int,
                        help="Minimum length of each stem. "
                        "Stems with fewer base-pairs are treated as unpaired.",
                        default=2)

    args = parser.parse_args()
    args.pseudoknots = True
    rnas, filenames = fuc.cgs_from_args(args,
                                        rna_type="any",
                                        return_filenames=True,
                                        skip_errors=True)

    #variables for statistics
    unfold = 0
    without_pk = 0
    with_pk = 0
    pseudoknot_dataset = []
    pseudoknot_dataset_extended = defaultdict(list)

    for pos, rna in enumerate(rnas):
        try:
            without_pk, with_pk, unfold, pseudoknots = identification_pseudoknot\
                                            (rna, without_pk, with_pk, unfold,args.minlength)

            #count types of pseudoknots
            total_pk_g1 = len(pseudoknots)

            pk_sortclasses = {}
            other = 0
            other_pk = []

            pk_classes, other = classify_pseudoknots(pseudoknots)

            print("pk_classes:")
            for key, pks in sorted(pk_classes.items()):
                print("{:<17s} {}\t{}".format(key, len(pks),
                                              ", ".join(map(str, pks))))
            print("other:" + "\t" + ", ".join(map(str, other)))

            filename = str(filenames[pos]).split("/")[-1]

            entry = {}
            for key, pks in pk_classes.items():
                entry[key] = len(pks)
            entry["other"] = len(other)
            entry["PK_other_structures"] = ",".join(map(str, other))
            entry["filename"] = filename
            entry["rnaname"] = rna.name
            print(filename)
            pseudoknot_dataset.append(entry)
            pk_id = 0
            for key, pks in pk_classes.items():
                for pk in pks:
                    pk_id += 1
                    extend_pk_description(pseudoknot_dataset_extended,
                                          filename, key, rna, pk, pk_id)

            for pk in other:
                pk_id += 1
                extend_pk_description(pseudoknot_dataset_extended, filename,
                                      "other", rna, pk, pk_id)
        except GraphIntegrityError:
            log.exception("Ignoring RNA %s; GraphIntegrityError", rna.name)
        except GraphConstructionError:
            log.exception("Ignoring RNA %s; GraphConstructionError", rna.name)
        except Exception:
            log.error("Error processing %s", rna.name)
            raise
    df1 = pandas.DataFrame(pseudoknot_dataset)
    df1.to_csv("pseudoknot_identification_genus2.csv",
               mode=args.outfile_mode,
               header=args.outfile_mode != "a",
               sep="\t")

    df2 = pandas.DataFrame(pseudoknot_dataset_extended)
    df2.to_csv("pseudoknot_identification_extended_genus2.csv",
               mode=args.outfile_mode,
               header=args.outfile_mode != "a",
               sep="\t")

    print("Structures with Pseudoknots: {}".format(with_pk))
    print("Structures without Pseudoknots: {}".format(without_pk))
    print("Structures unfold: {}".format(unfold))
예제 #22
0
def main(args):
    rnas = fuc.cgs_from_args(args, '+', '3d')
    pp = pymol_printer_from_args(args)

    if args.align:
        align_rnas(rnas)
    if args.labels:
        label_list = args.labels.split(",")
        labels = {}
        for label in label_list:
            if not label:
                continue
            try:
                elem, lab = label.split(':')
            except ValueError:
                raise ValueError(
                    "Please specify --labels with as list of colon-seperated tuples. Found invalid entry {}.".format(repr(label)))
            labels[elem] = lab
        if not pp.print_text:
            labels = defaultdict(lambda: "", labels)
            pp.print_text = True
    else:
        labels = {}

    color_modifier = 1.0
    log.info("Visualizing {} rnas".format(len(rnas)))
    for rna in rnas:
        pp.add_cg(rna, labels, color_modifier)
        color_modifier *= 0.7

    with make_temp_directory() as tmpdir:
        # The file describing the cg-structure as cylinders
        if args.pymol_file:
            stru_filename = args.pymol_file
        else:
            stru_filename = os.path.join(tmpdir, "structure")
        with open(stru_filename, "w") as f:
            f.write(pp.pymol_string())

        pdb_fns = []
        selections = ""
        for i, rna in enumerate(rnas):
            if rna.chains:
                obj_name = "pdb{}_{}".format(i, rna.name.replace("-", "_"))
                fn = os.path.join(tmpdir, obj_name + ".cif")
                pdb_fns.append(fn)
                ftup.output_multiple_chains(rna.chains.values(), fn, "cif")
                for d in rna.defines:
                    resids = list(
                        rna.define_residue_num_iterator(d, seq_ids=True))
                    if resids:
                        chains = {r.chain for r in resids}
                        sel = []
                        for c in chains:
                            sel.append("( %{} and chain {} and resi {}) ".format(
                                obj_name, c, "+".join(map(str, (r.resid[1] for r in resids)))))
                        selections += "select {}, ".format(
                            d + "_" + obj_name) + " or ".join(sel) + "\n"

        pymol_cmd = 'hide all\n'
        pymol_cmd += 'show cartoon, all\n'
        pymol_cmd += 'set cartoon_ring_mode\n'
        pymol_cmd += 'set cartoon_tube_radius, .3\n'
        if args.only_elements is not None:
            pymol_cmd += "hide all\n"

            for constraint in args.only_elements.split(','):
                color = pp.get_element_color(constraint)

                for r in cg.define_residue_num_iterator(constraint, seq_ids=True):
                    pymol_cmd += "show sticks, resi %r\n" % (r[1])
                    pymol_cmd += "color %s, resi %r\n" % (color, r[1])

        pymol_cmd += 'run %s\n' % (stru_filename)
        pymol_cmd += 'bg white\n'
        pymol_cmd += 'clip slab, 10000\n'
        #pymol_cmd += 'orient\n'
        pymol_cmd += selections
        if args.output is not None:
            pymol_cmd += 'ray\n'
            pymol_cmd += 'png %s\n' % (args.output)
            #pymol_cmd += 'quit\n'
        pml_filename = os.path.join(tmpdir, "command.pml")
        with open(pml_filename, "w") as f1:
            f1.write(pymol_cmd)
        if args.batch:
            p = sp.Popen(['pymol', '-cq'] + pdb_fns +
                         [pml_filename], stdout=sp.PIPE, stderr=sp.PIPE)
        else:
            p = sp.Popen(['pymol'] + pdb_fns + [pml_filename],
                         stdout=sp.PIPE, stderr=sp.PIPE)
        log.info("Now opening pymol")
        out, err = p.communicate()
        log.info("Out=\n%s", out)
        log.info("Errt=\n%s", err)
예제 #23
0
import logging


def get_parser():
    parser = fuc.get_rna_input_parser("Create stats",
                                      nargs='+',
                                      rna_type="only_cg")
    return parser


parser = get_parser()

if __name__ == "__main__":
    next_id = defaultdict(int)
    args = parser.parse_args()
    cgs = fuc.cgs_from_args(args, "only_cg")
    for cg in cgs:
        if sys.stderr.isatty():
            print(cg.name, file=sys.stderr)
        for elem in cg.defines.keys():
            if elem in cg.incomplete_elements:
                print("Skipping element", elem, file=sys.stderr)
                continue
            base_name = "{}:{}_".format(cg.name, elem[0])
            for stat in cg.get_stats(elem):
                idnr = next_id[base_name]
                next_id[base_name] += 1
                name = base_name + str(idnr)
                stat.pdb_name = name
                if elem.startswith("m"):
                    try: