コード例 #1
0
ファイル: rnafolds.py プロジェクト: scchess/gscripts
def main(options):
    fafile = options.fasta

    outpick = fafile.replace(".fa", ".pickle")
    
    fastafile = SeqIO.parse(open(fafile, 'r'), 'fasta')
    r = dtm.map(get_folds_probabilities_single, fastafile, foldparams=(options.fp).replace("_", " "))
            
    f = open(outpick, 'w')
    pickle.dump(r, file=f)
コード例 #2
0
ファイル: dtmTestSuite.py プロジェクト: SernMoe/ffxivcraftopt
def main():

    beginTime = time.time()
    countFailed = 0

    list1 = range(500)

    _logger.info("[%s] DTM test suite started", time.time() - beginTime)

    _logger.info("[%s] Testing worker id generation : '%s'",
                 time.time() - beginTime, dtm.getWorkerId())

    _logger.info("[%s] Testing synchronous calls...", time.time() - beginTime)
    list1r = dtm.map(mapFunc1, list1)
    list1t = list(map(mapFunc1, list1))

    if list1r != list1t:
        _logger.warning("[%s] DTM synchronous map test FAILED!",
                        time.time() - beginTime)
        countFailed += 1
    else:
        _logger.info("[%s] DTM synchronous map test successful",
                     time.time() - beginTime)

    applyTestr = dtm.apply(applyFunc1, "0123456789")
    if applyTestr != 10:
        _logger.warning("[%s] DTM synchronous apply test FAILED!",
                        time.time() - beginTime)
        countFailed += 1
    else:
        _logger.info("[%s] DTM synchronous apply test successful",
                     time.time() - beginTime)

    repeatTestr = dtm.repeat(applyFunc1, 20, "0123456789")
    repeatTestt = [10 for i in range(20)]

    if repeatTestr != repeatTestt:
        _logger.warning("[%s] DTM synchronous repeat test FAILED!",
                        time.time() - beginTime)
        countFailed += 1
    else:
        _logger.info("[%s] DTM synchronous repeat test successful",
                     time.time() - beginTime)

    filterTestr = dtm.filter(filterFunc1, list1)
    filterTestt = list(filter(filterFunc1, list1))

    if filterTestr != filterTestt:
        _logger.warning("[%s] DTM synchronous filter test FAILED!",
                        time.time() - beginTime)
        countFailed += 1
    else:
        _logger.info("[%s] DTM synchronous filter test successful",
                     time.time() - beginTime)

    ######################################################################################

    _logger.info("[%s] Testing asynchronous calls...", time.time() - beginTime)

    timeInit = time.time()
    mapAsyncReq2 = dtm.map_async(mapFunc2, list1)
    if mapAsyncReq2.ready() and time.time() - timeInit < 0.1:
        _logger.warning(
            "[%s] DTM asynchronous map test seems to have failed by time!",
            time.time() - beginTime)
    mapAsyncReq1 = dtm.map_async(mapFunc1, list1)

    list2t = [arg * 3 for arg in list1]

    mapAsyncReq1.wait()
    mapAsyncReq2.wait()

    if list1t == mapAsyncReq1.get() and list2t == mapAsyncReq2.get():
        _logger.info("[%s] DTM asynchronous map test successful",
                     time.time() - beginTime)
    else:
        _logger.warning("[%s] DTM asynchronous map test FAILED",
                        time.time() - beginTime)
        countFailed += 1

    timeInit = time.time()
    applyAsyncReq1 = dtm.apply_async(applyFunc2, 1)
    if applyAsyncReq1.ready() and time.time() - timeInit < 0.5:
        _logger.warning(
            "[%s] DTM asynchronous apply test seems to have failed by time!",
            time.time() - beginTime)
    applyAsyncReq2 = dtm.apply_async(applyFunc2, 2)
    applyAsyncReq3 = dtm.apply_async(applyFunc2, 3)

    applyAsyncReq1.wait()
    applyAsyncReq2.wait()
    applyAsyncReq3.wait()

    if applyAsyncReq1.get() == 3 and applyAsyncReq2.get(
    ) == 6 and applyAsyncReq3.get() == 9:
        _logger.info("[%s] DTM asynchronous apply test successful",
                     time.time() - beginTime)
    else:
        _logger.warning("[%s] DTM asynchronous apply test FAILED",
                        time.time() - beginTime)
        countFailed += 1

    imapObj = dtm.imap(mapFunc1, list1)
    list3r = [i for i in imapObj]

    if list3r == list1t:
        _logger.info("[%s] DTM (a)synchronous imap test successful",
                     time.time() - beginTime)
    else:
        _logger.warning("[%s] DTM (a)synchronous imap test FAILED",
                        time.time() - beginTime)
        countFailed += 1

    imapNotOrderedObj = dtm.imap_unordered(mapFunc1, list1, 50)
    list4r = [i for i in imapNotOrderedObj]
    list4r_sorted = list(sorted(list4r))

    if list4r != list4r_sorted and list4r_sorted == list1t:
        _logger.info("[%s] DTM asynchronous imap_unordered test successful",
                     time.time() - beginTime)
    elif list4r_sorted == list1t:
        _logger.info(
            "[%s] DTM asynchronous imap_unordered MAY have failed (same behavior as imap())",
            time.time() - beginTime)
    else:
        _logger.warning("[%s] DTM asynchronous imap_unordered test FAILED",
                        time.time() - beginTime)
        countFailed += 1

    ######################################################################################

    _logger.info("[%s] Testing asynchronous interactions...",
                 time.time() - beginTime)

    mapAsyncReq2 = dtm.map_async(mapFunc2, list1)
    mapAsyncReq1 = dtm.map_async(mapFunc1, list1)

    if dtm.testAll([mapAsyncReq2, mapAsyncReq1]):
        _logger.warning("[%s] DTM testAll() test FAILED",
                        time.time() - beginTime)
        countFailed += 1
    else:
        _logger.info("[%s] DTM testAll() test successful",
                     time.time() - beginTime)

    dtm.waitAll()

    if mapAsyncReq1.ready() and mapAsyncReq2.ready():
        _logger.info("[%s] DTM waitAll() test successful",
                     time.time() - beginTime)
    else:
        _logger.warning("[%s] DTM waitAll() test FAILED",
                        time.time() - beginTime)
        countFailed += 1

    applyAsyncReq1 = dtm.apply_async(applyFunc1, "0123456789")
    mapAsyncReq2 = dtm.map_async(mapFunc2, list1)

    retVal = dtm.waitAny()
    if retVal == applyAsyncReq1 and applyAsyncReq1.get(
    ) == 10 and mapAsyncReq2.ready() == False:
        _logger.info("[%s] DTM waitAny() test successful",
                     time.time() - beginTime)
    elif retVal == mapAsyncReq2 and isinstance(mapAsyncReq2.get(), list):
        _logger.info("[%s] DTM waitAny() test PROBABLY successful but weird",
                     time.time() - beginTime)
    else:
        _logger.warning("[%s] DTM waitAny() test FAILED",
                        time.time() - beginTime)
        countFailed += 1

    mapAsyncReq2.wait()

    if dtm.testAny() == mapAsyncReq2:
        _logger.info("[%s] DTM testAny() test successful",
                     time.time() - beginTime)
    else:
        _logger.warning("[%s] DTM testAny() test FAILED",
                        time.time() - beginTime)

    ######################################################################################

    _logger.info("[%s] Testing parameters and exceptions handling...",
                 time.time() - beginTime)

    applyParamPassr = dtm.apply(applyFunc4,
                                1,
                                2,
                                "abc",
                                bb={
                                    'a': 2,
                                    'b': 3,
                                    'c': 4
                                },
                                cc=range(10),
                                dd=13.37)
    applyParamPasst = applyFunc4(1,
                                 2,
                                 "abc",
                                 bb={
                                     'a': 2,
                                     'b': 3,
                                     'c': 4
                                 },
                                 cc=range(10),
                                 dd=13.37)

    if applyParamPassr == applyParamPasst:
        _logger.info("[%s] DTM parameters passing test successful",
                     time.time() - beginTime)
    else:
        _logger.warning("[%s] DTM parameters passing test FAILED",
                        time.time() - beginTime)
        countFailed += 1

    try:
        applyExceptTestr = dtm.map(applyFunc3, [-2, -1, 0])
    except ZeroDivisionError:
        _logger.info("[%s] DTM exception catch test successful",
                     time.time() - beginTime)
    else:
        _logger.warning("[%s] DTM exception catch test FAILED",
                        time.time() - beginTime)
        countFailed += 1

    _logger.info("[%s] DTM test suite done with %i errors",
                 time.time() - beginTime, countFailed)

    return 0
コード例 #3
0
ファイル: fahproject.py プロジェクト: jimsnyderjr/msmbuilder
    def write_all_trajectories(self,
                               input_dir,
                               output_dir,
                               stride,
                               max_rmsd,
                               min_gens,
                               center_conformations,
                               num_proc,
                               input_style,
                               update=False):
        """
        Convert all of the trajectories in the FAH project in input_dir to
        lh5 trajectory files which will be placed in output dir.

        If the 'update' flag is set, then will use the memory object to check for
        previously converted data, and add to it (rather than reconverting everything).
        This functionality can be more cleanly called through the update_trajectories()
        method.

        Parameters
        ----------
        input_dir : str
            The directory to look for XTC/DCD files in.

        output_dir : str
            The place to write the converted lh5s

        stride : int
            The size of the stride to employ. E.g., if stride = 3, the script
            keeps every 3rd MD snapshot from the original data. Useful to throw
            away highly correlated data if snapshots were saved frequently.

        max_rmsd : float
            Throw away any data that is further than `max_rmsd` (in nm) from the
            pdb file associated with the project. This is used as a sanity check
            to prevent including, e.g. data from a simulation that is blowing up.

        min_gens : int
            Discard trajectories with fewer than `min_gens` generations.

        center_conformations : bool
            Whether to center the converted (lh5) conformations.

        num_proc : int
            Number of processors to employ. Note that this function is typically
            I/O limited, so paralellism is unlikely to yield much gain.

        input_style : {'FAH', 'FILE'}
            If you use input_style = 'FAH', this code uses knowledge of the
            RUN*/CLONE* directory structure to yield all the CLONE directories.
            If you use input_style = 'FILE', this code uses os.walk() which is
            A LOT slower because it has to stat every file, but is capable of
            recursively searching for xtc files to arbitrary depths.

        update : bool
            If `True`, then tries to figure out what data has already been converted
            by reading the "memory state" in the provided ProjectInfo file, and only
            converts new data. If `False`, does a fresh re-convert.


        Notes
        -----
        Since sometimes a conversion fails, we collect all trajectories at the
        end and renumber them such that they are contiguously numbered.
        """

        if update:
            assert os.path.exists(output_dir)
        else:
            try:
                os.mkdir(output_dir)
            except OSError:
                logger.error('Error: The directory %s already exists',
                             output_dir)
                sys.exit(1)

        intermediate_filename_root = '_trj'  # A placeholder name

        #dtm does not play nice with OpenMP
        use_parallel_rmsd = (num_proc != 'use_dtm_instead')

        jobs = []
        for i, clone_dir in enumerate(
                self.yield_xtc_directories(input_dir, input_style)):

            job = {
                'clone_dir': clone_dir,
                'output_dir': output_dir,
                'pdb_file': self.pdb_topology,
                'trajectory_number': i,
                'stride': stride,
                'max_rmsd': max_rmsd,
                'min_gens': min_gens,
                'center_conformations': center_conformations,
                'memory_check': update,
                'omp_parallel_rmsd': use_parallel_rmsd
            }
            jobs.append(job)

        if len(jobs) == 0:
            raise RuntimeError('No conversion jobs found!')

        if num_proc == 'use_dtm_instead':
            # use DTM mpi parallel map
            dtm.map(self.write_trajectory_mapper, jobs)
        elif num_proc > 1:
            # use multiprocessing
            pool = Pool(processes=num_proc)
            pool.map(self.write_trajectory_mapper, jobs)
        else:
            # use regular serial execution
            map(self.write_trajectory_mapper, jobs)

        # Rename trajectory files such that they have contiguous numbering
        logger.info(
            "Finished Generating Trajectories. Renaming them now in contiguous order"
        )
        mapping = {
        }  # document the directory changes, allowing us to update memory
        for i, filename in enumerate(sorted(os.listdir(output_dir),
                                            key=keynat)):
            path = os.path.join(output_dir, filename)
            new_path = os.path.join(output_dir, "trj%d.lh5" % i)
            os.rename(path, new_path)
            mapping[path] = new_path

        # update the memory hash to accound for our renumbering
        for key in self.memory.keys():
            if key not in ['convert_parameters', 'SerializerFilename']:
                logger.info("%s --> %s", self.memory[key][0],
                            mapping[self.memory[key][0]])
                self.memory[key][0] = mapping[self.memory[key][0]]

        # save the parameters used for this run in the memory file, and write to disk
        logger.info("Generating Project File: %s", self.projectinfo_file)
        if update:
            try:
                os.remove(self.projectinfo_file
                          )  # if we are updating, just start w fresh slate
            except:
                pass

        self.memory['convert_parameters'] = (input_dir, output_dir, stride,
                                             max_rmsd, min_gens,
                                             center_conformations, num_proc,
                                             self.projectinfo_file,
                                             input_style)

        Project.CreateProjectFromDir(Filename=self.projectinfo_file,
                                     TrajFilePath=output_dir,
                                     TrajFileBaseName='trj',
                                     TrajFileType='.lh5',
                                     ConfFilename=self.pdb_topology,
                                     initial_memory=cPickle.dumps(self.memory))

        logger.info("Data converted properly.")

        return
コード例 #4
0
def main(options):
    if options.gene is None:
        #options.gene =  x = list(map(str.strip, open(base + "/lovci/projects/FOX2/FOX2_human_brain/CLIP/analysis_gsnap/bound_genes.slop.p05.t0.intron_only.txt").readlines()))[:-1]
        #if options.species == "hg19":
        #options.gene =  x = list(map(str.strip, open(base + "/lovci/projects/conservation/hg19/mammal_cons/ultra_allIntron.genes.txt").readlines()))[:-1]
        #if options.species == "mm9":
        #    options.gene =  x = list(map(str.strip, open(base + "/lovci/projects/conservation/mm9/ultra_allIntron.genes.txt").readlines()))[:-1]
        genelist = get_names(options.db)

        #print "which genes to run?... this will take awhile"

    else:
        genelist = options.gene
    #mongoport = "8585"
    #mongo = Popen(["ssh", "-L", ("%s:localhost:%s" %(mongoport, mongoport)), "oolite", "-N"])  #connect to mongo db

    if options.max_genes is not None:
        if not len(
                genelist
        ) < options.max_genes:  #already < max_genes genes in genelist
            import random
            #sample a random subset
            genelist = random.sample(genelist, options.max_genes)
    if not os.path.exists(options.outdir):
        print "Creating output directory %s" % (options.outdir)
        os.mkdir(options.outdir)
    if options.serial is True:
        for gene in genelist:
            geneLinks = fold_a_dir(gene,
                                   rewrite=options.rewrite,
                                   mfe_cutoff=options.mfe_cutoff,
                                   dir=options.dbdir,
                                   species=options.species,
                                   outdir=options.outdir)
            if geneLinks == None or len(geneLinks) == 0:
                print "There were no links found for %s" % (gene)
                continue
            if options.PET is True:
                if not options.species == "hg19":
                    print "only hg19 works for PET"
                    raise Exception

                tree = (base + "/lovci/projects/structure/hg19/PET_test/tree")
                from Bio import Phylo
                leaves = Phylo.read(tree, 'newick')
                speciesList = "-".join(
                    [i.name for i in leaves.get_terminals()])
                del leaves
                import conserved_structure
                with conserved_structure.OverlapWith(
                        geneLinks, options.proxCons,
                        options.distCons) as conservedOverlappers:

                    if conservedOverlappers.enter_ok is True:
                        conservedOverlappers = conservedOverlappers.names
                    else:
                        if geneLinks is None or len(geneLinks) == 0:
                            print "There are no links for this gene: %s" % (
                                gene)
                        else:
                            print "errors overlapping conserved regiosn with this gene %s that has this many links: %d" % (
                                gene, len(geneLinks))
                            raise Exception

                    print "There are %d Links overlapping conserved regionsin gene %s" % (
                        len(conservedOverlappers), gene)
                    for link in geneLinks:
                        if not link.name in conservedOverlappers:
                            continue
                        linkObject = conserved_structure.RNApair(link)
                        linkObject.multiZ("hg19", "hg19_46", speciesList)
                        linkObject.aliFasta(prefix=linkObject.name)
                        linkObject.PETcofold(tree=tree)
                        if linkObject.PETcofoldScore == "error":
                            print "Error finding PETcofold score for %s" % (
                                linkObject.name)
                        else:
                            print "%s has PETcofold score %f" % (
                                linkObject.name, linkObject.PETcofoldScore)
                        with conserved_structure.MongoConn(
                                "compute-2-2", 8585, 'RNAlinkDB',
                                'ConsLinks') as DBcon:
                            if DBcon.enter_ok is True:
                                DBcon = DBcon.con
                                DBcon.save(linkObject.__dict__, safe=True)
                                print "Saved %s" % (linkObject.name)
                            else:
                                print "There was a problem storing %s" % (
                                    linkObject.name)
                                raise Exception
    else:
        dtm.map(fold_a_dir,
                genelist,
                rewrite=options.rewrite,
                mfe_cutoff=options.mfe_cutoff,
                dir=options.dbdir,
                species=options.species,
                outdir=options.outdir)