Example #1
0
 def __init__(self, args, config, job_queue):
     try:
         os.mkdir(args.temp_directory)
     except:
         pass
     self._job_queue = job_queue
     self._command_graph = CommandGraph(job_queue)
     self.args = args
     self.config = config
Example #2
0
 def test_two_depends_on(self):
     q = JobQueue(1)
     g = CommandGraph(q)
     q.set_command_graph(g)
     c1 = CommandNode("sleep 1", "1")
     c2 = CommandNode("echo hello", "2", stdout="test.txt")
     c3 = CommandNode("echo hello", "3", stdout="test2.txt")
     g.add_node(command_node=c1, depends_on=[c3, c2])
     g.add_node(command_node=c3, depends_on=[c2])
     g.start()
     g.finish_block()
     os.remove("test.txt")
     os.remove("test2.txt")
Example #3
0
 def test_basic_depends_on(self):
     q = JobQueue(1)
     g = CommandGraph(q)
     q.set_command_graph(g)
     c1 = CommandNode("sleep 1", "1")
     c2 = CommandNode("echo hello", "2", stdout="test.txt")
     g.add_node(command_node=c1, depends_on=[c2])
     assert set(g.nodes()) == set(['1', '2'])
     g.start()
     g.finish_block()
     os.remove('test.txt')
Example #4
0
 def test_two_depends_on(self):
     q = JobQueue(1)
     g = CommandGraph(q)
     q.set_command_graph(g)
     c1 = CommandNode("sleep 1", "1")
     c2 = CommandNode("echo hello", "2", stdout="test.txt") 
     c3 = CommandNode("echo hello", "3", stdout="test2.txt") 
     g.add_node(command_node=c1, depends_on=[c3,c2])
     g.add_node(command_node=c3, depends_on=[c2])
     g.start()
     g.finish_block()
     os.remove("test.txt")
     os.remove("test2.txt")
 def __init__(self, args, config, job_queue):
     try:
         os.mkdir(args.temp_directory)
     except:
         pass
     self._job_queue = job_queue 
     self._command_graph = CommandGraph(job_queue)
     self.args = args 
     self.config = config 
Example #6
0
 def test_basic_depends_on(self):
     q = JobQueue(1)
     g = CommandGraph(q)
     q.set_command_graph(g)
     c1 = CommandNode("sleep 1", "1")
     c2 = CommandNode("echo hello", "2", stdout="test.txt") 
     g.add_node(command_node=c1, depends_on=[c2])
     assert set(g.nodes())== set(['1', '2'])
     g.start()
     g.finish_block()
     os.remove('test.txt')
Example #7
0
class CreateNGaDNAPGraph(object):
    """
        Represents NGADNAGRaph.

        Processes the command-line options and arguments 
        to generate a custom command graph that will then be run. 
    """
    def _get_bam_list(self, args):
        fastq_pairs = {}
        for fastq in args.fastq_files:
            sample = fastq.split('.')[0]
            # Will this get us to the first dot or _ where the sample name is specified
            sample = sample.split('_')[0]
            try:
                fastq_pairs[sample].append(fastq)
            except KeyError:
                fastq_pairs[sample] = [fastq]
        return fastq_pairs

    def add_node(self, node, dependencies):
        """
            Add node to the command graph
        """
        self._command_graph.add_node(command_node=node,
                                     depends_on=dependencies)

    def _populate_gvcfs(self, args, config, bam_dependencies):
        """
            Function to generate the GVCF files. 

            At this point in the process we should have BAM files
            for the entire dataset, we need to extract the sequence            interest from each of the BAM files that survive the an            alysis.
        """

    def _populate_align(self, args, config):
        bam_list = self._get_bam_list(args)
        logging.info("Started populating Alignments Graph")
        reference_genome = config['reference']['fasta']
        # Do bwa with ancient_options
        align_dependencies = []
        sam_files = {}
        bam_dependencies = []
        for fastqs in bam_list.values():
            fq1 = fastqs[0]
            fq2 = fastqs[1]
            if args.ancient_dna:
                tmp_node1 = adapter_removal(config, args, fq1, fq2)
                #aln
                bwa_node = bwa_aln(args, config, fq1 + '.collapsed')
                self.add_node(bwa_node, [tmp_node1])
                # ##samse1
                bwa_samse1 = bwa_samse(args, config, bwa_node.stdout,
                                       fq1 + '.collapsed')
                self.add_node(bwa_samse1, [bwa_node])
                # ##sort sam
                sam_sort = sort_sam(args, config, bwa_samse1.stdout)
                self.add_node(sam_sort, [bwa_samse1])
                # ##md
                picard_md_one = picard_md(args, config, sam_sort.stdout)
                self.add_node(picard_md_one, [sam_sort])
                # rg
                picard_read_groups = picard_rg(args, config,
                                               picard_md_one.stdout)
                self.add_node(picard_read_groups, [picard_md_one])
                # Comment out bam file creation
                if not args.no_map_damage:
                    map_damage_data = map_damage(args, config,
                                                 picard_md_one.stdout)
                    self.add_node(map_damage_data, [picard_md_one])
                    anc_filter = ancient_filter(args, config,
                                                map_damage_data.stdout)
                    self.add_node(anc_filter, [map_damage_data])
                else:
                    anc_filter = ancient_filter(args, config,
                                                picard_read_groups.stdout)
                    self.add_node(anc_filter, [picard_read_groups])
                bam_dependencies.append(anc_filter)

                if args.use_unmerged_reads:
                    bwa_node2 = bwa_aln(args, config, fq1 + '.p1')
                    self.add_node(bwa_node2, [tmp_node1])
                    bwa_node3 = bwa_aln(args, config, fq2 + '.p2')
                    self.add_node(bwa_node3, [tmp_node1])
                    bwa_samse2 = bwa_sampe(args, config, bwa_node2.stdout,
                                           bwa_node3.stdout, fq1 + '.p1',
                                           fq2 + '.p2')
                    self.add_node(bwa_samse2, [bwa_node2, bwa_node3])
                    sam_sort1 = sort_sam(args, config, bwa_samse2.stdout)
                    self.add_node(sam_sort1, [bwa_samse2])
                    filter_unique = filter_unique_bam(args, config,
                                                      sam_sort1.stdout)
                    self.add_node(filter_unique, [sam_sort1])
                    picard_read_groups1 = picard_rg(args, config,
                                                    picard_md_one.stdout)
                    self.add_node(picard_read_groups1, [filter_unique])
                    if not args.no_map_damage:
                        map_damage_data1 = map_damage(args, config,
                                                      filter_unique.stdout)
                        self.add_node(map_damage_data1, [filter_unique])
                        anc_filter = ancient_filter(args, config,
                                                    map_damage_data1.stdout)
                        self.add_node(anc_filter, [map_damage_data1])
                    else:
                        anc_filter1 = ancient_filter(
                            args, config, picard_read_groups1.stdout)
                        self.add_node(anc_filter1, [picard_read_groups1])
                    merge_bam = merge_bams(args, config, anc_filter.stdout,
                                           anc_filter1.stdout)
                    self.add_node(merge_bam, [anc_filter1, anc_filter])
                    bam_dependencies.append(anc_filter)
                else:
                    bam_dependencies.append()

    #              map_damage_data1 = map_damage(args, config, filter_unique.stdout)
    #             self.add_node(map_damage_data1, [filter_unique])
    # Mark duplicates
    # Ancient filter
        return (bam_dependencies)

        # Rescale if needed
        # Then create the dependencies between bwa and adapter, don't need dependencies for the other jobs.

    @property
    def command_graph(self):
        return self._command_graph

    def __init__(self, args, config, job_queue):
        try:
            os.mkdir(args.temp_directory)
        except:
            pass
        self._job_queue = job_queue
        self._command_graph = CommandGraph(job_queue)
        self.args = args
        self.config = config

    def populate(self):
        bam_dependencies = self._populate_align(self.args, self.config)
        self._populate_gvcfs(self.args, self.config, bam_dependencies)

    def run(self):
        self._command_graph.start()
        self._command_graph.finish_block()
class CreateNGaDNAPGraph(object):
    """
        Represents NGADNAGRaph.

        Processes the command-line options and arguments 
        to generate a custom command graph that will then be run. 
    """

    def _get_bam_list(self, args):
        fastq_pairs = {}
        for fastq in args.fastq_files:
            sample = fastq.split('.')[0]
            # Will this get us to the first dot or _ where the sample name is specified
            sample = sample.split('_')[0]
            try:
                fastq_pairs[sample].append(fastq)
            except KeyError:
                fastq_pairs[sample] = [fastq]
        return fastq_pairs

    def add_node(self, node, dependencies): 
        """
            Add node to the command graph
        """
        self._command_graph.add_node(command_node=node, depends_on=dependencies)

    def _populate_gvcfs(self, args, config, bam_dependencies):
        """
            Function to generate the GVCF files. 

            At this point in the process we should have BAM files
            for the entire dataset, we need to extract the sequence            interest from each of the BAM files that survive the an            alysis.
        """

    def _populate_align(self, args, config):
        bam_list = self._get_bam_list(args)
        logging.info("Started populating Alignments Graph")
        reference_genome = config['reference']['fasta']  
        # Do bwa with ancient_options
        align_dependencies = []
        sam_files = {}
        bam_dependencies = []
        for fastqs in bam_list.values():
            fq1 = fastqs[0]
            fq2 = fastqs[1]
            if args.ancient_dna:
                tmp_node1 = adapter_removal(config, args, fq1, fq2)
                #aln
                bwa_node = bwa_aln(args, config, fq1 + '.collapsed')
                self.add_node(bwa_node, [tmp_node1])
               # ##samse1
                bwa_samse1 = bwa_samse(args, config, bwa_node.stdout, fq1 + '.collapsed')
                self.add_node(bwa_samse1, [bwa_node])
               # ##sort sam 
                sam_sort = sort_sam(args, config, bwa_samse1.stdout)
                self.add_node(sam_sort, [bwa_samse1])
               # ##md 
                picard_md_one = picard_md(args,config, sam_sort.stdout) 
                self.add_node(picard_md_one, [sam_sort])
                # rg 
                picard_read_groups = picard_rg(args, config, picard_md_one.stdout)
                self.add_node(picard_read_groups, [picard_md_one])
                # Comment out bam file creation
                if not args.no_map_damage:
                    map_damage_data = map_damage(args, config, picard_md_one.stdout)
                    self.add_node(map_damage_data, [picard_md_one])
                    anc_filter = ancient_filter(args, config, map_damage_data.stdout)
                    self.add_node(anc_filter, [map_damage_data])
                else:
                    anc_filter = ancient_filter(args, config, picard_read_groups.stdout)
                    self.add_node(anc_filter, [picard_read_groups])
                bam_dependencies.append(anc_filter)

                if args.use_unmerged_reads:  
                    bwa_node2 = bwa_aln(args, config, fq1 + '.p1')
                    self.add_node(bwa_node2, [tmp_node1])
                    bwa_node3 = bwa_aln(args, config, fq2 + '.p2')
                    self.add_node(bwa_node3, [tmp_node1])
                    bwa_samse2 = bwa_sampe(args, config, 
                                           bwa_node2.stdout, bwa_node3.stdout,
                                           fq1 + '.p1', fq2 + '.p2')
                    self.add_node(bwa_samse2, [bwa_node2, bwa_node3])
                    sam_sort1= sort_sam(args, config, bwa_samse2.stdout)
                    self.add_node(sam_sort1, [bwa_samse2])
                    filter_unique = filter_unique_bam(args, config, sam_sort1.stdout) 
                    self.add_node(filter_unique, [sam_sort1])
                    picard_read_groups1 = picard_rg(args, config, picard_md_one.stdout)
                    self.add_node(picard_read_groups1, [filter_unique])
                    if not args.no_map_damage:
                        map_damage_data1 = map_damage(args, config, filter_unique.stdout)
                        self.add_node(map_damage_data1, [filter_unique])
                        anc_filter = ancient_filter(args, config, map_damage_data1.stdout)
                        self.add_node(anc_filter, [map_damage_data1])
                    else:
                        anc_filter1 = ancient_filter(args, config, picard_read_groups1.stdout)
                        self.add_node(anc_filter1, [picard_read_groups1])
                    merge_bam = merge_bams(args, config, anc_filter.stdout, anc_filter1.stdout)
                    self.add_node(merge_bam, [anc_filter1, anc_filter])
                    bam_dependencies.append(anc_filter)
                else:
                    bam_dependencies.append()
      #              map_damage_data1 = map_damage(args, config, filter_unique.stdout)
       #             self.add_node(map_damage_data1, [filter_unique])
                    # Mark duplicates
                    # Ancient filter
        return(bam_dependencies)

                    # Rescale if needed
                # Then create the dependencies between bwa and adapter, don't need dependencies for the other jobs.
   


    @property
    def command_graph(self):
        return self._command_graph

    def __init__(self, args, config, job_queue):
        try:
            os.mkdir(args.temp_directory)
        except:
            pass
        self._job_queue = job_queue 
        self._command_graph = CommandGraph(job_queue)
        self.args = args 
        self.config = config 

    def populate(self):
        bam_dependencies = self._populate_align(self.args, self.config)
        self._populate_gvcfs(self.args, self.config, bam_dependencies)

    def run(self):
        self._command_graph.start()
        self._command_graph.finish_block()
Example #9
0
 def test_create_graph(self):
     q = JobQueue(1)
     g = CommandGraph(q)
     g.add("n1", "n2")
     g.add("n1", "n4")
     g.add("n2", "n3")
     assert (set(g._graph.keys()) == set(["n1", "n2"]))
     g.remove("n1")
     assert (set(g._graph.keys()) == set(["n2"]))
     g.add("n1", "n4")
     g.remove("n4")
     assert (g._graph['n1'] == set([]))
     assert (g.get_adjacent('n1') == set([]))
     assert (g.get_adjacent('n2') == set(["n3"]))
Example #10
0
 def test_create_graph(self):
     q = JobQueue(1)
     g = CommandGraph(q)
     g.add("n1", "n2")
     g.add("n1", "n4")
     g.add("n2", "n3")
     assert (set(g._graph.keys())== set(["n1", "n2"]))
     g.remove("n1")
     assert (set(g._graph.keys()) == set(["n2"]))
     g.add("n1", "n4")
     g.remove("n4")
     assert (g._graph['n1'] == set([]))
     assert (g.get_adjacent('n1') == set([]))
     assert (g.get_adjacent('n2') == set(["n3"]))