예제 #1
0
def test_get_custom_prority():
    """docstring for test_get_chr_prority"""
    assert get_chromosome_priority(chrom='AHA_1',
                                   chrom_dict={
                                       'AHA_1': 2,
                                       'AHA_2': 3
                                   }) == 2
예제 #2
0
    def run(self):
        """Starts the printing"""
        # Print the results to a temporary file:
        number_of_finished = 0
        proc_name = self.name
        self.logger.info(('{0}: starting'.format(proc_name)))
        
        if self.outfile:
            if isinstance(self.outfile, str):
                self.outfile = open(self.outfile, 'w+', encoding="utf-8")
        
        while True:
            
            # A task is a variant dictionary
            self.logger.debug(('{0} fetching next variant'.format(proc_name)))
            variant = self.task_queue.get()
            
            if self.task_queue.full():
                self.logger.warning('Variant queue full')
            
            if variant is None:
                self.logger.info('All variants printed.')
                if self.outfile:
                    self.outfile.close()
                break
            
            self.logger.debug("Printing variant {0}".format(variant.get('variant_id', 'unknown')))
            
            priority = None
            
            if self.mode == 'chromosome': 
                priority = get_chromosome_priority(variant['CHROM'])

            elif self.mode == 'score': 
                priority = get_rank_score(variant_dict=variant)
            
            
            print_variant(variant_dict=variant, header_line=self.header, 
                          priority=priority, outfile=self.outfile, 
                          silent=self.silent)
        
        return
예제 #3
0
    def run(self):
        """Starts the printing"""
        # Print the results to a temporary file:
        number_of_finished = 0
        proc_name = self.name
        self.logger.info(('{0}: starting'.format(proc_name)))
        
        if self.outfile:
            if isinstance(self.outfile, str):
                self.outfile = open(self.outfile, 'w+', encoding="utf-8-sig")
        
        while True:
            
            # A task is a variant dictionary
            self.logger.debug(('{0} fetching next variant'.format(proc_name)))
            variant = self.task_queue.get()
            
            if self.task_queue.full():
                self.logger.warning('Variant queue full')
            
            if variant is None:
                self.logger.info('All variants printed.')
                if self.outfile:
                    self.outfile.close()
                break
            
            self.logger.debug("Printing variant {0}".format(variant.get('variant_id', 'unknown')))
            
            priority = None
            
            if self.mode == 'chromosome': 
                priority = get_chromosome_priority(variant['CHROM'])

            elif self.mode == 'score': 
                priority = get_rank_score(variant_dict=variant)
            
            
            print_variant(variant_dict=variant, header_line=self.header, 
                          priority=priority, outfile=self.outfile, 
                          silent=self.silent)
        
        return
예제 #4
0
def sort(variant_file, outfile, family_id, silent, position, temp_dir):
    """
    Sort a VCF file based on rank score.
    """    
    logger = logging.getLogger(__name__)
    head = HeaderParser()

    logger.info("Running GENMOD sort version {0}".format(__version__))
    start = datetime.now()
    # Create a temporary variant file for sorting
    logger.debug("Creating temporary file for sorting")
    if temp_dir:
        temp_file = NamedTemporaryFile(delete=False, dir=temp_dir)
    else:
        temp_file = NamedTemporaryFile(delete=False)
    temp_file.close()
    # Open the temp file with codecs
    temp_file_handle = open(
                                temp_file.name,
                                mode='w',
                                encoding='utf-8',
                                errors='replace'
                                )
    logger.debug("Temp file created")
    logger.info("Printing variants to temp file")
    nr_variants = 0
    # Print the variants with rank score in first column
    for line in variant_file:
        line = line.rstrip()
        if line.startswith('#'):
            if line.startswith('##'):
                head.parse_meta_data(line)
            else:
                head.parse_header_line(line)
        else:
            nr_variants += 1
            priority = '0'
            
            if position:
                chrom = line.split()[0]
                priority = get_chromosome_priority(chrom)
            else:
                priority = get_rank_score(line)
            
            print_variant(
                variant_line=line, 
                priority=priority, 
                outfile=temp_file_handle
            )
    
    temp_file_handle.close()
    
    logger.info("Variants printed to temp file")
    logger.info("Nr or variants in VCF file: {0}".format(nr_variants))
    
    sort_mode = 'rank'
    
    if position:
        sort_mode = 'chromosome'
    
    logger.info("Sorting variants")
    sort_variants(
        infile = temp_file.name, 
        mode=sort_mode
    )
    logger.info("Variants sorted")

    logger.debug("Printing headers")
    print_headers(
        head = head, 
        outfile = outfile, 
        silent=silent
    )
    logger.debug("Headers printed")
    
    logger.info("Printing variants")
    with open(temp_file.name, mode='r', encoding='utf-8', errors='replace') as f:
        for variant_line in f:
            print_variant(
                variant_line = variant_line, 
                outfile = outfile, 
                mode = 'modified',
                silent=False
                )
    logger.debug("Variants printed")
    
    logger.info("Removing temp file")
    os.remove(temp_file.name)
    logger.debug("Temp file removed")
    
    logger.info("Sorting done, time for sorting: {0}".format(datetime.now()-start))
예제 #5
0
def sort(variant_file, outfile, family_id, silent, position, temp_dir):
    """
    Sort a VCF file based on rank score.
    """
    head = HeaderParser()
    variant_file = get_file_handle(variant_file)
    logger.info("Running GENMOD sort version {0}".format(__version__))
    start = datetime.now()
    # Create a temporary variant file for sorting
    logger.debug("Creating temporary file for sorting")
    if temp_dir:
        temp_file = NamedTemporaryFile(delete=False, dir=temp_dir)
    else:
        temp_file = NamedTemporaryFile(delete=False)
    temp_file.close()
    # Open the temp file with codecs
    temp_file_handle = open(temp_file.name,
                            mode='w',
                            encoding='utf-8',
                            errors='replace')
    logger.debug("Temp file created")
    logger.info("Printing variants to temp file")
    nr_variants = 0
    # Print the variants with rank score in first column
    for line in variant_file:
        line = line.rstrip()
        if line.startswith('#'):
            if line.startswith('##'):
                head.parse_meta_data(line)
            else:
                head.parse_header_line(line)
        else:
            nr_variants += 1
            priority = '0'

            if position:
                chrom = line.split()[0]
                priority = get_chromosome_priority(chrom)
            else:
                priority = get_rank_score(line)

            print_variant(variant_line=line,
                          priority=priority,
                          outfile=temp_file_handle)

    temp_file_handle.close()

    logger.info("Variants printed to temp file")
    logger.info("Nr or variants in VCF file: {0}".format(nr_variants))

    sort_mode = 'rank'

    if nr_variants == 0:
        logger.debug("Printing headers")
        print_headers(head=head, outfile=outfile, silent=silent)
        sys.exit(0)

    if position:
        sort_mode = 'chromosome'

    logger.info("Sorting variants")
    sort_variants(infile=temp_file.name, mode=sort_mode)
    logger.info("Variants sorted")

    logger.debug("Printing headers")
    print_headers(head=head, outfile=outfile, silent=silent)
    logger.debug("Headers printed")

    logger.info("Printing variants")
    with open(temp_file.name, mode='r', encoding='utf-8',
              errors='replace') as f:
        for variant_line in f:
            print_variant(variant_line=variant_line,
                          outfile=outfile,
                          mode='modified',
                          silent=False)
    logger.debug("Variants printed")

    logger.info("Removing temp file")
    os.remove(temp_file.name)
    logger.debug("Temp file removed")

    logger.info("Sorting done, time for sorting: {0}".format(datetime.now() -
                                                             start))
예제 #6
0
def test_get_X_priority():
    """docstring for test_get_X_priority"""
    assert get_chromosome_priority(chrom='X', chrom_dict={}) == '23'
예제 #7
0
def test_get_custom_prority():
    """docstring for test_get_chr_prority"""
    assert get_chromosome_priority(chrom='AHA_1', chrom_dict={'AHA_1':2, 'AHA_2':3}) == '2'
예제 #8
0
def test_get_chr_prority():
    """docstring for test_get_chr_prority"""
    assert get_chromosome_priority(chrom='chr1', chrom_dict={}) == '1'
예제 #9
0
def test_get_OTHER_priority():
    """docstring for test_get_MT_priority"""
    assert get_chromosome_priority(chrom='GL37', chrom_dict={}) == '26'
예제 #10
0
def test_get_X_priority():
    """docstring for test_get_X_priority"""
    assert get_chromosome_priority(chrom='X', chrom_dict={}) == '23'
예제 #11
0
def test_get_chr_prority():
    """docstring for test_get_chr_prority"""
    assert get_chromosome_priority(chrom='chr1', chrom_dict={}) == '1'
예제 #12
0
def test_get_OTHER_priority():
    """docstring for test_get_MT_priority"""
    assert get_chromosome_priority(chrom='GL37', chrom_dict={}) == '26'