Beispiel #1
0
    def test_above_threshold(self):
        """
        Test that set memory limit raises an error when limit is passed
        """
        mem_size = 0.05  # 50 mb
        test_size = 500 * 1024 * 1024  # 500 mb

        with self.assertRaises(MemoryError):
            memory_limit.set_limit(mem_size)
            self._assign_variable_of_size(test_size)
Beispiel #2
0
    def test_below_threshold(self):
        """
        Test that set memory limit does not raise an error when limit is not passed
        """
        mem_size = 0.05  # 50 mb
        test_size = 5 * 1024 * 1024  # 5 mb

        try:
            memory_limit.set_limit(mem_size)
            self._assign_variable_of_size(test_size)
        except MemoryError:
            self.fail("Unexpected MemoryError raised")
Beispiel #3
0
    def test_above_threshold_multi_thread(self):
        """
        Test that set memory limit raises an error when limit is passed in multiple threads
        """
        mem_size = 0.05  # 50 mb
        test_size = 500 * 1024 * 1024  # 500 mb

        with self.assertRaises(MemoryError):
            memory_limit.set_limit(mem_size)
            with ThreadPoolExecutor(max_workers=1) as executor:
                thread = executor.submit(self._assign_variable_of_size,
                                         test_size)
                thread.result(timeout=60)
Beispiel #4
0
    def test_below_threshold_multi_thread(self):
        """
        Test that set memory limit does not raise an error when limit is not passed in multiple threads
        """
        mem_size = 0.05  # 50 mb
        test_size = 5 * 1024 * 1024  # 5 mb

        try:
            memory_limit.set_limit(mem_size)
            with ThreadPoolExecutor(max_workers=1) as executor:
                thread = executor.submit(self._assign_variable_of_size,
                                         test_size)
                thread.result(timeout=60)
        except MemoryError:
            self.fail("Unexpected MemoryError raised")
def main():
    """
    Main workflow
    """
    start_time = time.time()

    # init parser
    parser = init_parser()
    args = parser.parse_args()

    # create output directory if not already present
    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir)
    assert (os.path.isdir(args.output_dir))

    # set logger verbosity level
    if args.verbose:
        initialize_logger(args.output_dir)
    else:
        initialize_logger(args.output_dir, logging.ERROR)

    logger = logging.getLogger(__name__)

    # set memory limit if passed in
    if args.max_memory is not None:
        memory_limit.set_limit(args.max_memory)

    # create the path for genome file
    logger.info("Creating genome file...")
    genome_location = args.output_dir + '/Run_Genome'

    # Get the sequences in a Seq format from user fasta or genebank files
    logger.info("Generating target dictionary..")
    copies = [args.copy_number] * len(args.genome_sequence) if isinstance(
        args.copy_number, int) else args.copy_number
    target_dict, genome = get_sequence(args.target_sequence,
                                       args.genome_sequence, copies)

    ref_record = SeqRecord(genome,
                           id="refgenome",
                           name="reference",
                           description="a reference background")
    ref_record = ref_record + ref_record.reverse_complement()
    SeqIO.write(ref_record, genome_location, "fasta")

    # Select the guides based on the purpose and the azimuth model
    logger.info("Selecting initial guides..")
    guide_list = guide_generator.select_guides(target_dict, args.purpose,
                                               args.azimuth_cutoff)

    # Build and run the model
    logger.info("Initializing binding strength model..")
    model = guide_strength_calculator.initalize_model(genome_location)

    logger.info("Processing guides off-target binding...")
    results_df = guide_strength_calculator.process_guides(
        model, guide_list, num_threads=args.num_threads)

    #generate and append Rank array
    logger.info("Generating result file..")
    results_df.sort_values(by=['Gene/ORF Name', 'Entropy Score'], inplace=True)
    results_df.drop_duplicates(inplace=True)
    rank_array = []
    for gene in results_df['Gene/ORF Name'].unique():
        num_guides = len(
            results_df[results_df['Gene/ORF Name'] == gene]['Guide Sequence'])
        rank_array.extend(list(np.arange(1, num_guides + 1)))
    results_df['Rank in Target Gene'] = rank_array

    results_df.to_csv(args.output_dir + '/output.csv', index=False)

    process = psutil.Process(os.getpid())

    total_time = time.time() - start_time
    minutes = int(total_time // 60)
    seconds = int(total_time % 60)
    max_memory = process.memory_info().rss // (1024 * 1024)
    logger.info(
        "Finished sgRNAble run, total time elapsed (min/sec): %d:%d, max memory usage (MB): %d",
        minutes, seconds, max_memory)